diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 558f6bbf4dc13..a455ce381e91d 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -63,6 +63,7 @@ ENV ARROW_BUILD_INTEGRATION=ON \ ARROW_DATASET=OFF \ ARROW_FILESYSTEM=OFF \ ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ ARROW_GANDIVA=OFF \ ARROW_HDFS=OFF \ ARROW_JEMALLOC=OFF \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34582f6f07206..01d461d14a250 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -167,16 +167,6 @@ if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() -# ---------------------------------------------------------------------- -# cmake options -include(DefineOptions) - -if(ARROW_BUILD_SHARED AND NOT ARROW_POSITION_INDEPENDENT_CODE) - message(WARNING "Can't disable position-independent code to build shared libraries, enabling" - ) - set(ARROW_POSITION_INDEPENDENT_CODE ON) -endif() - # Needed for linting targets, etc. if(${CMAKE_VERSION} VERSION_LESS "3.12.0") find_package(PythonInterp) @@ -192,6 +182,16 @@ else() set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) endif() +# ---------------------------------------------------------------------- +# cmake options +include(DefineOptions) + +if(ARROW_BUILD_SHARED AND NOT ARROW_POSITION_INDEPENDENT_CODE) + message(WARNING "Can't disable position-independent code to build shared libraries, enabling" + ) + set(ARROW_POSITION_INDEPENDENT_CODE ON) +endif() + if(ARROW_USE_SCCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER) @@ -350,88 +350,6 @@ if(UNIX) add_custom_target(iwyu-all ${BUILD_SUPPORT_DIR}/iwyu/iwyu.sh all) endif(UNIX) -# -# Set up various options -# - -if(ARROW_BUILD_BENCHMARKS - OR ARROW_BUILD_TESTS - OR ARROW_BUILD_INTEGRATION - OR ARROW_FUZZING) - set(ARROW_TESTING ON) -endif() - -if(ARROW_GANDIVA) - set(ARROW_WITH_RE2 ON) - set(ARROW_WITH_UTF8PROC ON) -endif() - -if(ARROW_BUILD_INTEGRATION AND ARROW_FLIGHT) - set(ARROW_FLIGHT_SQL ON) -endif() - -if(ARROW_FLIGHT_SQL) - set(ARROW_FLIGHT ON) -endif() - -if(ARROW_CUDA - OR ARROW_FLIGHT - OR ARROW_PARQUET - OR ARROW_BUILD_TESTS - OR ARROW_BUILD_BENCHMARKS) - set(ARROW_IPC ON) -endif() - -if(ARROW_SUBSTRAIT) - set(ARROW_PARQUET ON) - set(ARROW_IPC ON) - set(ARROW_COMPUTE ON) - set(ARROW_DATASET ON) -endif() - -if(ARROW_SKYHOOK) - set(ARROW_DATASET ON) - set(ARROW_PARQUET ON) - set(ARROW_WITH_LZ4 ON) - set(ARROW_WITH_SNAPPY ON) -endif() - -if(ARROW_TESTING) - set(ARROW_JSON ON) -endif() - -if(ARROW_DATASET) - set(ARROW_COMPUTE ON) - set(ARROW_FILESYSTEM ON) -endif() - -if(ARROW_PARQUET) - set(ARROW_COMPUTE ON) -endif() - -if(ARROW_PYTHON) - set(ARROW_COMPUTE ON) - set(ARROW_CSV ON) - set(ARROW_DATASET ON) - set(ARROW_FILESYSTEM ON) - set(ARROW_HDFS ON) - set(ARROW_JSON ON) -endif() - -if(MSVC_TOOLCHAIN) - # ORC doesn't build on windows - set(ARROW_ORC OFF) - # Plasma using glog is not fully tested on windows. - set(ARROW_USE_GLOG OFF) -endif() - -if(ARROW_ORC) - set(ARROW_WITH_LZ4 ON) - set(ARROW_WITH_SNAPPY ON) - set(ARROW_WITH_ZLIB ON) - set(ARROW_WITH_ZSTD ON) -endif() - # datetime code used by iOS requires zlib support if(IOS) set(ARROW_WITH_ZLIB ON) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 46eef60002484..a7cdb07f3e24a 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -117,14 +117,6 @@ "ARROW_GANDIVA": "ON" } }, - { - "name": "features-python", - "inherits": "features-main", - "hidden": true, - "cacheVariables": { - "ARROW_PYTHON": "ON" - } - }, { "name": "features-maximal", "inherits": [ @@ -132,8 +124,7 @@ "features-cuda", "features-filesystems", "features-flight", - "features-gandiva", - "features-python" + "features-gandiva" ], "hidden": true, "displayName": "Debug build with everything enabled (except benchmarks and CUDA)", @@ -194,12 +185,6 @@ "displayName": "Debug build with tests and Gandiva", "cacheVariables": {} }, - { - "name": "ninja-debug-python", - "inherits": ["base-debug", "features-python"], - "displayName": "Debug build with tests and Python support", - "cacheVariables": {} - }, { "name": "ninja-debug-maximal", "inherits": ["base-debug", "features-maximal"], @@ -243,12 +228,6 @@ "displayName": "Release build with Gandiva", "cacheVariables": {} }, - { - "name": "ninja-release-python", - "inherits": ["base-release", "features-python"], - "displayName": "Release build with Python support", - "cacheVariables": {} - }, { "name": "ninja-release-maximal", "inherits": ["base-release", "features-maximal"], diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 6b77e36db7731..d5ebf18d56f8e 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -50,6 +50,18 @@ function(list_join lst glue out) endfunction() macro(define_option name description default) + set(options) + set(one_value_args) + set(multi_value_args DEPENDS) + cmake_parse_arguments(ARG + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + check_description_length(${name} ${description}) list_join(description "\n" multiline_description) @@ -59,6 +71,7 @@ macro(define_option name description default) set("${name}_OPTION_DESCRIPTION" ${description}) set("${name}_OPTION_DEFAULT" ${default}) set("${name}_OPTION_TYPE" "bool") + set("${name}_OPTION_DEPENDS" ${ARG_DEPENDS}) endmacro() macro(define_option_string name description default) @@ -81,6 +94,48 @@ macro(define_option_string name description default) endif() endmacro() +# Topological sort by Tarjan's algorithm. +set(ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED) +macro(tsort_bool_option_dependencies_visit option_name) + if("${${option_name}_TSORT_STATUS}" STREQUAL "VISITING") + message(FATAL_ERROR "Cyclic option dependency is detected: ${option_name}") + elseif("${${option_name}_TSORT_STATUS}" STREQUAL "") + set(${option_name}_TSORT_STATUS "VISITING") + foreach(needed_option_name ${${option_name}_OPTION_DEPENDS}) + tsort_bool_option_dependencies_visit(${needed_option_name}) + endforeach() + set(${option_name}_TSORT_STATUS "VISITED") + list(INSERT ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED 0 ${option_name}) + endif() +endmacro() +macro(tsort_bool_option_dependencies) + foreach(category ${ARROW_OPTION_CATEGORIES}) + foreach(option_name ${ARROW_${category}_OPTION_NAMES}) + if("${${option_name}_OPTION_TYPE}" STREQUAL "bool") + if("${${option_name}_TSORT_STATUS}" STREQUAL "") + tsort_bool_option_dependencies_visit(${option_name}) + endif() + endif() + endforeach() + endforeach() +endmacro() + +macro(resolve_option_dependencies) + if(MSVC_TOOLCHAIN) + # Plasma using glog is not fully tested on windows. + set(ARROW_USE_GLOG OFF) + endif() + + tsort_bool_option_dependencies() + foreach(option_name ${ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED}) + if(${${option_name}}) + foreach(depended_option_name ${${option_name}_OPTION_DEPENDS}) + set(${depended_option_name} ON) + endforeach() + endif() + endforeach() +endmacro() + # Top level cmake dir if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") set(ARROW_DEFINE_OPTIONS_DEFAULT ON) @@ -168,14 +223,27 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_BUILD_EXAMPLES "Build the Arrow examples" OFF) - define_option(ARROW_BUILD_TESTS "Build the Arrow googletest unit tests" OFF) + define_option(ARROW_BUILD_TESTS + "Build the Arrow googletest unit tests" + OFF + DEPENDS + ARROW_IPC + ARROW_TESTING) define_option(ARROW_ENABLE_TIMING_TESTS "Enable timing-sensitive tests" ON) - define_option(ARROW_BUILD_INTEGRATION "Build the Arrow integration test executables" - OFF) + define_option(ARROW_BUILD_INTEGRATION + "Build the Arrow integration test executables" + OFF + DEPENDS + ARROW_TESTING) - define_option(ARROW_BUILD_BENCHMARKS "Build the Arrow micro benchmarks" OFF) + define_option(ARROW_BUILD_BENCHMARKS + "Build the Arrow micro benchmarks" + OFF + DEPENDS + ARROW_IPC + ARROW_TESTING) # Reference benchmarks are used to compare to naive implementation, or # discover various hardware limits. @@ -200,7 +268,11 @@ takes precedence over ccache if a storage backend is configured" ON) "shared" "static") - define_option(ARROW_FUZZING "Build Arrow Fuzzing executables" OFF) + define_option(ARROW_FUZZING + "Build Arrow Fuzzing executables" + OFF + DEPENDS + ARROW_TESTING) define_option(ARROW_LARGE_MEMORY_TESTS "Enable unit tests which use large memory" OFF) @@ -235,18 +307,39 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_CSV "Build the Arrow CSV Parser Module" OFF) - define_option(ARROW_CUDA "Build the Arrow CUDA extensions (requires CUDA toolkit)" OFF) + define_option(ARROW_CUDA + "Build the Arrow CUDA extensions (requires CUDA toolkit)" + OFF + DEPENDS + ARROW_IPC) - define_option(ARROW_DATASET "Build the Arrow Dataset Modules" OFF) + define_option(ARROW_DATASET + "Build the Arrow Dataset Modules" + OFF + DEPENDS + ARROW_COMPUTE + ARROW_FILESYSTEM) define_option(ARROW_FILESYSTEM "Build the Arrow Filesystem Layer" OFF) define_option(ARROW_FLIGHT - "Build the Arrow Flight RPC System (requires GRPC, Protocol Buffers)" OFF) - - define_option(ARROW_FLIGHT_SQL "Build the Arrow Flight SQL extension" OFF) - - define_option(ARROW_GANDIVA "Build the Gandiva libraries" OFF) + "Build the Arrow Flight RPC System (requires GRPC, Protocol Buffers)" + OFF + DEPENDS + ARROW_IPC) + + define_option(ARROW_FLIGHT_SQL + "Build the Arrow Flight SQL extension" + OFF + DEPENDS + ARROW_FLIGHT) + + define_option(ARROW_GANDIVA + "Build the Gandiva libraries" + OFF + DEPENDS + ARROW_WITH_RE2 + ARROW_WITH_UTF8PROC) define_option(ARROW_GCS "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF) @@ -271,23 +364,50 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" OFF) - define_option(ARROW_PARQUET "Build the Parquet libraries" OFF) - - define_option(ARROW_ORC "Build the Arrow ORC adapter" OFF) + define_option(ARROW_PARQUET + "Build the Parquet libraries" + OFF + DEPENDS + ARROW_COMPUTE + ARROW_IPC) + + define_option(ARROW_ORC + "Build the Arrow ORC adapter" + OFF + DEPENDS + ARROW_WITH_LZ4 + ARROW_WITH_SNAPPY + ARROW_WITH_ZLIB + ARROW_WITH_ZSTD) define_option(ARROW_PLASMA "Build the plasma object store along with Arrow" OFF) - define_option(ARROW_PYTHON "Build the Arrow CPython extensions" OFF) - define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF) - define_option(ARROW_SKYHOOK "Build the Skyhook libraries" OFF) - - define_option(ARROW_SUBSTRAIT "Build the Arrow Substrait Consumer Module" OFF) + define_option(ARROW_SKYHOOK + "Build the Skyhook libraries" + OFF + DEPENDS + ARROW_DATASET + ARROW_PARQUET + ARROW_WITH_LZ4 + ARROW_WITH_SNAPPY) + + define_option(ARROW_SUBSTRAIT + "Build the Arrow Substrait Consumer Module" + OFF + DEPENDS + ARROW_DATASET + ARROW_IPC + ARROW_PARQUET) define_option(ARROW_TENSORFLOW "Build Arrow with TensorFlow support enabled" OFF) - define_option(ARROW_TESTING "Build the Arrow testing libraries" OFF) + define_option(ARROW_TESTING + "Build the Arrow testing libraries" + OFF + DEPENDS + ARROW_JSON) #---------------------------------------------------------------------- set_option_category("Thirdparty toolchain") @@ -489,6 +609,8 @@ that have not been built" option(ARROW_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file" ON) + + resolve_option_dependencies() endif() macro(validate_config)