Skip to content

Commit

Permalink
GH-42149: [C++] Use FetchContent for bundled ORC (#43011)
Browse files Browse the repository at this point in the history
### Rationale for this change

This also has a workaround for https://issues.apache.org/jira/browse/ORC-1732 .

### What changes are included in this PR?

ORC 2.0.1 has a dependency detection problem. We can't override the detection with ExternalProject but can override the detection with FetchContent.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #42149

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
  • Loading branch information
kou authored Jul 8, 2024
1 parent 38d37b4 commit e8a795b
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 91 deletions.
8 changes: 8 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()

# https://cmake.org/cmake/help/latest/policy/CMP0170.html
#
# CMP0170 is for enforcing dependency populations by users with
# FETCHCONTENT_FULLY_DISCONNECTED=ON.
if(POLICY CMP0170)
cmake_policy(SET CMP0170 NEW)
endif()

set(ARROW_VERSION "17.0.0-SNAPSHOT")

string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
Expand Down
279 changes: 190 additions & 89 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,7 @@ macro(build_zlib)
set_property(TARGET ZLIB::ZLIB
PROPERTY IMPORTED_LOCATION
"${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a")
target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include")
list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
else()
set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
Expand Down Expand Up @@ -4490,116 +4491,216 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include")
# ----------------------------------------------------------------------
# Apache ORC

macro(build_orc)
function(build_orc)
message(STATUS "Building Apache ORC from source")

set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install")
set(ORC_HOME "${ORC_PREFIX}")
set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
set(ORC_STATIC_LIB
"${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}")
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
fetchcontent_declare(orc
${FC_DECLARE_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()

set(CMAKE_UNITY_BUILD FALSE)

set(ORC_PREFER_STATIC_LZ4
OFF
CACHE BOOL "" FORCE)
get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY)
set(LZ4_HOME
${LZ4_ROOT}
CACHE STRING "" FORCE)
set(LZ4_LIBRARY
LZ4::lz4
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_PROTOBUF
OFF
CACHE BOOL "" FORCE)
get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY)
set(PROTOBUF_HOME
${Protobuf_ROOT}
CACHE STRING "" FORCE)
# ORC uses this.
target_include_directories(${ARROW_PROTOBUF_LIBPROTOC}
INTERFACE "${PROTOBUF_INCLUDE_DIR}")
set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC})
set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF})
set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC})

set(ORC_PREFER_STATIC_SNAPPY
OFF
CACHE BOOL "" FORCE)
get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY)
set(SNAPPY_HOME
${Snappy_ROOT}
CACHE STRING "" FORCE)
set(SNAPPY_LIBRARY
${Snappy_TARGET}
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_ZLIB
OFF
CACHE BOOL "" FORCE)
get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY)
set(ZLIB_HOME
${ZLIB_ROOT}
CACHE STRING "" FORCE)
set(ZLIB_LIBRARY
ZLIB::ZLIB
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_ZSTD
OFF
CACHE BOOL "" FORCE)
get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY)
set(ZSTD_HOME
${ZSTD_ROOT}
CACHE STRING "" FORCE)
set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD})

set(BUILD_CPP_TESTS
OFF
CACHE BOOL "" FORCE)
set(BUILD_JAVA
OFF
CACHE BOOL "" FORCE)
set(BUILD_LIBHDFSPP
OFF
CACHE BOOL "" FORCE)
set(BUILD_TOOLS
OFF
CACHE BOOL "" FORCE)
set(INSTALL_VENDORED_LIBS
OFF
CACHE BOOL "" FORCE)
set(STOP_BUILD_ON_WARNING
OFF
CACHE BOOL "" FORCE)

# We can remove this with ORC 2.0.2 or later.
list(PREPEND CMAKE_MODULE_PATH
${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules)

fetchcontent_makeavailable(orc)

add_library(orc::orc INTERFACE IMPORTED)
target_link_libraries(orc::orc INTERFACE orc)
target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include"
"${orc_SOURCE_DIR}/c++/include")

list(APPEND ARROW_BUNDLED_STATIC_LIBS orc)
else()
set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install")
set(ORC_HOME "${ORC_PREFIX}")
set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
set(ORC_STATIC_LIB
"${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}"
)

get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY)
get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY)

get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY)
get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY)

get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)
get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)

get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)

set(ORC_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
-DSTOP_BUILD_ON_WARNING=OFF
-DBUILD_LIBHDFSPP=OFF
-DBUILD_JAVA=OFF
-DBUILD_TOOLS=OFF
-DBUILD_CPP_TESTS=OFF
-DINSTALL_VENDORED_LIBS=OFF
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
"-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
"-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
"-DPROTOBUF_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOBUF}>"
"-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
"-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
"-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
"-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
"-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
"-DZSTD_HOME=${ORC_ZSTD_ROOT}"
"-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
"-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
if(ZLIB_ROOT)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
endif()
set(ORC_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
-DSTOP_BUILD_ON_WARNING=OFF
-DBUILD_LIBHDFSPP=OFF
-DBUILD_JAVA=OFF
-DBUILD_TOOLS=OFF
-DBUILD_CPP_TESTS=OFF
-DINSTALL_VENDORED_LIBS=OFF
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
"-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
"-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
"-DPROTOBUF_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOBUF}>"
"-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
"-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
"-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
"-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
"-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
"-DZSTD_HOME=${ORC_ZSTD_ROOT}"
"-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
"-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
if(ZLIB_ROOT)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
endif()

# Work around CMake bug
file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})
# Work around CMake bug
file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})

externalproject_add(orc_ep
${EP_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

set(ORC_VENDORED 1)

add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
# Protobuf generated files may use ABSL_DCHECK*() and
# absl::log_internal_check_op is needed for them.
if(TARGET absl::log_internal_check_op)
target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op)
endif()
if(NOT MSVC)
if(NOT APPLE AND ARROW_ENABLE_THREADING)
target_link_libraries(orc::orc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9")
target_link_libraries(orc::orc INTERFACE stdc++fs)
externalproject_add(orc_ep
${EP_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
# Protobuf generated files may use ABSL_DCHECK*() and
# absl::log_internal_check_op is needed for them.
if(TARGET absl::log_internal_check_op)
target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8")
target_link_libraries(orc::orc INTERFACE c++fs)
if(NOT MSVC)
if(NOT APPLE AND ARROW_ENABLE_THREADING)
target_link_libraries(orc::orc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
endif()
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
add_dependencies(orc::orc orc_ep)
list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
endif()

add_dependencies(orc::orc orc_ep)

list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
endmacro()
set(ORC_VENDORED
TRUE
PARENT_SCOPE)
set(ARROW_BUNDLED_STATIC_LIBS
${ARROW_BUNDLED_STATIC_LIBS}
PARENT_SCOPE)
endfunction()

if(ARROW_ORC)
resolve_dependency(orc HAVE_ALT TRUE)
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
if(ORC_VENDORED)
set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION})
else()
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
set(ARROW_ORC_VERSION ${orcAlt_VERSION})
message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
endif()
message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
endif()

# ----------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
ARROW_ORC_BUILD_VERSION=2.0.0
ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df
ARROW_ORC_BUILD_VERSION=2.0.1
ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d
ARROW_PROTOBUF_BUILD_VERSION=v21.3
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require
Expand Down
1 change: 1 addition & 0 deletions dev/tasks/linux-packages/apache-arrow/debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ override_dh_auto_configure:
-DARROW_WITH_ZSTD=ON \
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-DCUDAToolkit_ROOT=/usr \
-DFETCHCONTENT_FULLY_DISCONNECTED=OFF \
-DPARQUET_BUILD_EXECUTABLES=ON \
-DPARQUET_REQUIRE_ENCRYPTION=ON

Expand Down

0 comments on commit e8a795b

Please sign in to comment.