Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-format-ignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ be/src/apache-orc/*
be/src/clucene/*
be/src/gutil/*
be/src/glibc-compatibility/*
be/src/macos_patches/*
be/src/util/sse2neo.h
be/src/util/sse2neon.h
be/src/util/mustache/mustache.h
Expand Down
61 changes: 47 additions & 14 deletions .github/workflows/be-ut-mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,23 @@ jobs:
- 'gensrc/proto/**'
- 'gensrc/thrift/**'

- name: Ccache ${{ github.ref }}
- name: Free disk space
if: ${{ github.event_name == 'schedule' || steps.filter.outputs.be_changes == 'true' }}
uses: ./.github/actions/ccache-action
with:
key: BE-UT-macOS
max-size: "5G"
restore-keys: BE-UT-macOS-
run: |
# Remove unused Xcode versions to free ~10 GB
sudo rm -rf /Applications/Xcode_15*.app /Applications/Xcode_14*.app /Applications/Xcode_13*.app || true
# Remove iOS/tvOS/watchOS simulator runtimes (~5 GB)
sudo rm -rf /Library/Developer/CoreSimulator/Profiles/Runtimes || true
sudo rm -rf ~/Library/Developer/Xcode/iOS\ DeviceSupport || true
# Remove GitHub Actions hosted tool cache (Python/Node/Go/etc, ~10 GB)
sudo rm -rf /Users/runner/hostedtoolcache || true
sudo rm -rf /opt/hostedtoolcache || true
# Remove Android SDK / dotnet / Swift toolchains we don't use
sudo rm -rf /Users/runner/Library/Android || true
sudo rm -rf /usr/local/share/dotnet || true
sudo rm -rf /Library/Developer/Toolchains || true
brew cleanup --prune=all || true
df -h /

- name: Run UT ${{ github.ref }}
if: ${{ github.event_name == 'schedule' || steps.filter.outputs.be_changes == 'true' }}
Expand All @@ -77,24 +87,47 @@ jobs:
'gettext'
'wget'
'pcre'
'openjdk@11'
'openjdk@17'
'maven'
'node'
'llvm@16'
)
brew install "${cellars[@]}" || true

pushd thirdparty
arch="$(uname -m)"
[[ "${arch}" == 'aarch64' ]] && arch='arm64'
file="doris-thirdparty-prebuilt-darwin-${arch}.tar.xz"
branch="${{ github.base_ref }}"
if [[ -z "${branch}" ]] || [[ "${branch}" == 'master' ]]; then
curl -L https://github.com/apache/doris-thirdparty/releases/download/automation/doris-thirdparty-prebuilt-darwin-x86_64.tar.xz \
-o doris-thirdparty-prebuilt-darwin-x86_64.tar.xz
curl -L "https://github.com/apache/doris-thirdparty/releases/download/automation/${file}" -o "${file}"
else
curl -L "https://github.com/apache/doris-thirdparty/releases/download/automation-${branch/branch-/}/doris-thirdparty-prebuilt-darwin-x86_64.tar.xz" \
-o doris-thirdparty-prebuilt-darwin-x86_64.tar.xz
curl -L "https://github.com/apache/doris-thirdparty/releases/download/automation-${branch/branch-/}/${file}" -o "${file}"
fi
tar -xvf doris-thirdparty-prebuilt-darwin-x86_64.tar.xz
tar -xf "${file}"
rm -f "${file}"
popd

export JAVA_HOME="${JAVA_HOME_17_X64%\/}"
./run-be-ut.sh --run -j "$(nproc)" --clean
export JAVA_HOME="$(brew --prefix openjdk@17)/libexec/openjdk.jdk/Contents/Home"
# Use RELEASE build instead of ASAN on macOS:
# 1) ASAN deadlocks at startup on macOS 15+ due to a dyld bug
# (dyld_shared_cache_iterate_text_swift), so ASAN tests cannot run.
# 2) ASAN compilation is 2-3x slower; RELEASE fits within the
# GitHub Actions 6-hour job limit.
export BUILD_TYPE_UT=RELEASE
# RELEASE defines -DNDEBUG which disables internal self-check
# methods (column_self_check, check_type_and_column, ...). Several
# tests rely on these checks returning errors. Undefine NDEBUG so
# the test behaviour matches the Linux ASAN_UT build (which also
# has NDEBUG undefined).
export EXTRA_CXX_FLAGS=-UNDEBUG
# Disable ccache: GitHub macOS runners have ~14 GB free disk and
# the local ccache (default 5 GB) competes with build artifacts.
# Since cache is not restored across runs (different build types
# and the prebuilt thirdparty already consumes ~4 GB), hit rate
# is effectively 0% and ccache only adds disk/IO overhead.
export CCACHE_DISABLE=1
# Skip tests that require >4 GB of memory; macos-15 runners have
# only 7 GB RAM and these tests OOM-crash the test binary.
./run-be-ut.sh --run -j "$(nproc)" --clean \
-f '-*test_sink_large_string_data_over_4g*'
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ header:
- "be/src/util/sse2neo.h"
- "be/src/util/sse2neon.h"
- "be/src/util/utf8_check.cpp"
- "be/src/macos_patches/**"
- "be/src/pch/*"
- "be/test/data"
- "be/test/expected_result"
Expand Down
54 changes: 54 additions & 0 deletions be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,21 @@ set(USE_STAT64 0)
set(USE_BTHREAD OFF)

# Out of source build need to set the binary dir
# On macOS, clucene's src/ext/for builds bitpack_*.o via add_custom_command that
# invokes the C compiler directly with c_flags_list (derived from CMAKE_C_FLAGS).
# Such raw invocations do not pick up CMAKE_OSX_SYSROOT, so the SDK headers
# (stdio.h, ...) are not found. Inject -isysroot into CMAKE_C_FLAGS for the
# clucene subtree only, then restore.
if (APPLE)
execute_process(COMMAND xcrun --show-sdk-path
OUTPUT_VARIABLE MACOS_SDK_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
set(_saved_cmake_c_flags "${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -isysroot ${MACOS_SDK_PATH}")
endif()
add_subdirectory(${CONTRIB_PATH}/clucene ${PROJECT_BINARY_DIR}/clucene EXCLUDE_FROM_ALL)
if (APPLE)
set(CMAKE_C_FLAGS "${_saved_cmake_c_flags}")
endif()

set(clucene_options -w -Wall)
if (COMPILER_CLANG)
Expand Down Expand Up @@ -337,6 +351,14 @@ add_compile_options(-g
-fno-omit-frame-pointer
$<$<COMPILE_LANGUAGE:CXX>:-Wnon-virtual-dtor>)

if (OS_MACOSX)
# macOS CI runners (GitHub Actions macos-15) have 7 GB RAM. Linking the
# ~2 GB doris_be_test binary with full DWARF debug info triggers OOM in
# ld/dsymutil. Disable debug info on macOS; crash diagnosis can be done
# locally where memory is not constrained.
add_compile_options(-g0)
endif()

add_compile_options(-Wno-unused-parameter
-Wno-sign-compare)

Expand Down Expand Up @@ -378,6 +400,13 @@ if (COMPILER_CLANG)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-stdlib=libc++>)
add_definitions(-DUSE_LIBCPP)
endif()
if (OS_MACOSX)
# Several unscoped enum values in Doris headers (bitmap_value.h BITMAP,
# memtable_memory_limiter.h NONE/SOFT/HARD) shadow values in protobuf-
# generated headers. Clang@18 started detecting these. Suppress on macOS
# for local UT builds; the test binary already suppresses via -Wno-shadow.
add_compile_options(-Wno-shadow -Wno-shadow-field)
endif()
endif ()

add_definitions(-D__STDC_FORMAT_MACROS
Expand Down Expand Up @@ -512,6 +541,13 @@ include_directories(
${SRC_DIR}/
)

if (OS_MACOSX)
# Apple SDK lazy_split_view.h bug: __outer_iterator/__inner_iterator are forward-declared
# private but defined public, causing a hard error in Apple Clang 17. Patch fixes
# the forward declarations by moving them to the public section.
include_directories(BEFORE ${SRC_DIR}/macos_patches)
endif()

include_directories(
SYSTEM
${COMMON_SRC_DIR}
Expand Down Expand Up @@ -915,6 +951,13 @@ if (ENABLE_PCH)
endif()
if (COMPILER_CLANG)
target_compile_options(pch PRIVATE -Xclang -fno-pch-timestamp)
if (OS_MACOSX)
# Suppress shadow warnings in PCH, consistent with doris_be_test's own compile options.
# bitmap_value.h has an unscoped enum value 'BITMAP' that shadows the one in
# olap_file.pb.h; Apple Clang detects this and fails the PCH compile. Scope this
# to macOS so Linux clang builds keep full shadow-warning coverage.
target_compile_options(pch PRIVATE -Wno-shadow -Wno-shadow-field)
endif()
endif()
endif()

Expand All @@ -933,6 +976,17 @@ add_subdirectory(${SRC_DIR}/exprs)
add_subdirectory(${SRC_DIR}/format)
add_subdirectory(${SRC_DIR}/gen_cpp)
add_subdirectory(${SRC_DIR}/io)
if(DISABLE_ANN)
# Propagate the flag as a C++ preprocessor macro so headers can guard
# FAISS-specific includes (e.g. function_array_distance.h, ann_index_writer.cpp).
# Must be set BEFORE add_subdirectory(storage/index/ann) so that the ann_index
# library itself sees the flag.
add_compile_definitions(DISABLE_ANN)
endif()
# Always build ann_index so production code (Storage, Exprs) can link against
# the ANN infrastructure classes. When DISABLE_ANN is ON the subdirectory
# skips the FAISS/OpenMP sub-libraries and guards the FAISS-specific code
# paths, producing a stub library that satisfies all symbol references.
add_subdirectory(${SRC_DIR}/storage/index/ann)
add_subdirectory(${SRC_DIR}/storage)
add_subdirectory(${SRC_DIR}/runtime)
Expand Down
17 changes: 16 additions & 1 deletion be/src/common/factory_creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@
// during inherits
// TODO try to allow make_unique
//

// On macOS, LLVM libc++ (both 18 and 20) has an internal compressed_pair
// static_cast issue when make_shared is used with types that inherit from
// enable_shared_from_this. Using the two-allocation form std::shared_ptr(new T)
// avoids the libc++ bug while still correctly initialising the enable_shared_from_this
// weak pointer (the shared_ptr constructor detects and handles this automatically).
// create_shared is defined inside the class body (via macro expansion), so it can
// access the private operator new of each class.
#ifdef __APPLE__
#define _DORIS_CREATE_SHARED_IMPL(TypeName) \
std::shared_ptr<TypeName>(new TypeName(std::forward<Args>(args)...))
#else
#define _DORIS_CREATE_SHARED_IMPL(TypeName) std::make_shared<TypeName>(std::forward<Args>(args)...)
#endif

#define ENABLE_FACTORY_CREATOR(TypeName) \
private: \
void* operator new(std::size_t size) { \
Expand All @@ -59,7 +74,7 @@ public:
} \
template <typename... Args> \
static std::shared_ptr<TypeName> create_shared(Args&&... args) { \
return std::make_shared<TypeName>(std::forward<Args>(args)...); \
return _DORIS_CREATE_SHARED_IMPL(TypeName); \
} \
template <typename... Args> \
static std::unique_ptr<TypeName> create_unique(Args&&... args) { \
Expand Down
1 change: 1 addition & 0 deletions be/src/core/binary_cast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <bit>
#include <cstdint>
#include <type_traits>

Expand Down
5 changes: 5 additions & 0 deletions be/src/core/value/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,12 @@ class DateV2Value {
requires std::is_integral_v<U>
DateV2Value(U other) = delete;

#ifndef __APPLE__
// Apple Clang 17: a user-declared defaulted copy constructor taking a
// non-const reference (T&) breaks std::is_trivially_copyable even when
// also = default. The const T& overload below is sufficient.
DateV2Value(DateV2Value<T>& other) = default;
#endif

DateV2Value(const DateV2Value<T>& other) = default;

Expand Down
4 changes: 2 additions & 2 deletions be/src/exprs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ set(SRC_FILES ${SRC_FILES}
)

add_library(Exprs STATIC ${SRC_FILES})
# function_array_distance uses faiss headers (platform_macros.h, distances.h),
# which are exported by ann_index via PUBLIC linkage with faiss.
# function_array_distance and vectorized_fn_call use ANN infrastructure types.
# ann_index is always built (with FAISS stubs when DISABLE_ANN=ON) so always link.
target_link_libraries(Exprs PRIVATE ann_index)

pch_reuse(Exprs)
Expand Down
35 changes: 35 additions & 0 deletions be/src/exprs/function/array/function_array_distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,43 @@

#pragma once

#ifndef DISABLE_ANN
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/distances.h>
#else
// When FAISS is disabled (e.g. macOS local UT builds), provide inline stubs
// for the FAISS functions used by L1/L2/InnerProduct distance classes and
// no-op definitions for the float-control pragmas.
#include <cmath>
#include <cstddef>
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
namespace faiss {
inline float fvec_L1(const float* x, const float* y, std::size_t d) {
float s = 0;
for (std::size_t i = 0; i < d; ++i) {
s += std::abs(x[i] - y[i]);
}
return s;
}
inline float fvec_L2sqr(const float* x, const float* y, std::size_t d) {
float s = 0;
for (std::size_t i = 0; i < d; ++i) {
float diff = x[i] - y[i];
s += diff * diff;
}
return s;
}
inline float fvec_inner_product(const float* x, const float* y, std::size_t d) {
float s = 0;
for (std::size_t i = 0; i < d; ++i) {
s += x[i] * y[i];
}
return s;
}
} // namespace faiss
#endif

#include <gen_cpp/Types_types.h>

#include <optional>
Expand Down
2 changes: 1 addition & 1 deletion be/src/load/routine_load/kinesis_conf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Status KinesisConf::apply_to_get_shard_iterator_request(
it = _get_shard_iterator_params.find("timestamp");
if (it != _get_shard_iterator_params.end()) {
try {
request.SetTimestamp(Aws::Utils::DateTime(std::stol(it->second)));
request.SetTimestamp(Aws::Utils::DateTime(static_cast<int64_t>(std::stol(it->second))));
} catch (const std::exception&) {
return Status::InternalError("Failed to apply get_shard_iterator.timestamp: {}",
it->second);
Expand Down
Loading
Loading