From de04fca449290d2201ccf19b65010ebad54a8388 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 23 Sep 2022 17:17:08 +0900 Subject: [PATCH 1/4] ARROW-6858: [C++] Simplify transitive build option dependencies This approach adds "depends" information to each (bool) build options and use it to resolve transitive build option dependencies automatically. This approach implements topological sort in CMake to resolve transitive dependencies. Another approach proposed in the associated Jira issue: It creates a Python script that generates a CMake code (.cmake file) that handles transitive dependencies. Dependencies information are written in the Python script. I think that this approach is better than the another approach because: * We can put option definitions and their dependencies into the same place. (We don't need to put them into .cmake and .py.) * We don't need to regenerate a .cmake file when we update option dependencies. * We can specify dependencies information with a simple way. (We can just add "DEPENDS ARROW_XXX ARROW_YYY ..." to an option defintion.) Here are downsides of this approach: * We need to maintain topological sort implementation in CMake. Because CMake doesn't provide a topological sort feature that is used in CMake internally. But topological sort algorithm is well-known (Tarjan's algorithm is published at 1976) and it's implementation in this approach has only 20+ CMake lines. I think that we can maintain it. * This can't support complex conditions such as "ARROW_X AND NOT ARROW_Y". But we don't have any complex condition for now. --- ci/docker/conda-integration.dockerfile | 1 + cpp/CMakeLists.txt | 102 ++------------- cpp/CMakePresets.json | 23 +--- cpp/cmake_modules/DefineOptions.cmake | 164 +++++++++++++++++++++---- 4 files changed, 155 insertions(+), 135 deletions(-) diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 558f6bbf4dc13..a455ce381e91d 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -63,6 +63,7 @@ ENV ARROW_BUILD_INTEGRATION=ON \ ARROW_DATASET=OFF \ ARROW_FILESYSTEM=OFF \ ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ ARROW_GANDIVA=OFF \ ARROW_HDFS=OFF \ ARROW_JEMALLOC=OFF \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34582f6f07206..01d461d14a250 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -167,16 +167,6 @@ if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() -# ---------------------------------------------------------------------- -# cmake options -include(DefineOptions) - -if(ARROW_BUILD_SHARED AND NOT ARROW_POSITION_INDEPENDENT_CODE) - message(WARNING "Can't disable position-independent code to build shared libraries, enabling" - ) - set(ARROW_POSITION_INDEPENDENT_CODE ON) -endif() - # Needed for linting targets, etc. if(${CMAKE_VERSION} VERSION_LESS "3.12.0") find_package(PythonInterp) @@ -192,6 +182,16 @@ else() set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) endif() +# ---------------------------------------------------------------------- +# cmake options +include(DefineOptions) + +if(ARROW_BUILD_SHARED AND NOT ARROW_POSITION_INDEPENDENT_CODE) + message(WARNING "Can't disable position-independent code to build shared libraries, enabling" + ) + set(ARROW_POSITION_INDEPENDENT_CODE ON) +endif() + if(ARROW_USE_SCCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER) @@ -350,88 +350,6 @@ if(UNIX) add_custom_target(iwyu-all ${BUILD_SUPPORT_DIR}/iwyu/iwyu.sh all) endif(UNIX) -# -# Set up various options -# - -if(ARROW_BUILD_BENCHMARKS - OR ARROW_BUILD_TESTS - OR ARROW_BUILD_INTEGRATION - OR ARROW_FUZZING) - set(ARROW_TESTING ON) -endif() - -if(ARROW_GANDIVA) - set(ARROW_WITH_RE2 ON) - set(ARROW_WITH_UTF8PROC ON) -endif() - -if(ARROW_BUILD_INTEGRATION AND ARROW_FLIGHT) - set(ARROW_FLIGHT_SQL ON) -endif() - -if(ARROW_FLIGHT_SQL) - set(ARROW_FLIGHT ON) -endif() - -if(ARROW_CUDA - OR ARROW_FLIGHT - OR ARROW_PARQUET - OR ARROW_BUILD_TESTS - OR ARROW_BUILD_BENCHMARKS) - set(ARROW_IPC ON) -endif() - -if(ARROW_SUBSTRAIT) - set(ARROW_PARQUET ON) - set(ARROW_IPC ON) - set(ARROW_COMPUTE ON) - set(ARROW_DATASET ON) -endif() - -if(ARROW_SKYHOOK) - set(ARROW_DATASET ON) - set(ARROW_PARQUET ON) - set(ARROW_WITH_LZ4 ON) - set(ARROW_WITH_SNAPPY ON) -endif() - -if(ARROW_TESTING) - set(ARROW_JSON ON) -endif() - -if(ARROW_DATASET) - set(ARROW_COMPUTE ON) - set(ARROW_FILESYSTEM ON) -endif() - -if(ARROW_PARQUET) - set(ARROW_COMPUTE ON) -endif() - -if(ARROW_PYTHON) - set(ARROW_COMPUTE ON) - set(ARROW_CSV ON) - set(ARROW_DATASET ON) - set(ARROW_FILESYSTEM ON) - set(ARROW_HDFS ON) - set(ARROW_JSON ON) -endif() - -if(MSVC_TOOLCHAIN) - # ORC doesn't build on windows - set(ARROW_ORC OFF) - # Plasma using glog is not fully tested on windows. - set(ARROW_USE_GLOG OFF) -endif() - -if(ARROW_ORC) - set(ARROW_WITH_LZ4 ON) - set(ARROW_WITH_SNAPPY ON) - set(ARROW_WITH_ZLIB ON) - set(ARROW_WITH_ZSTD ON) -endif() - # datetime code used by iOS requires zlib support if(IOS) set(ARROW_WITH_ZLIB ON) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 46eef60002484..a7cdb07f3e24a 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -117,14 +117,6 @@ "ARROW_GANDIVA": "ON" } }, - { - "name": "features-python", - "inherits": "features-main", - "hidden": true, - "cacheVariables": { - "ARROW_PYTHON": "ON" - } - }, { "name": "features-maximal", "inherits": [ @@ -132,8 +124,7 @@ "features-cuda", "features-filesystems", "features-flight", - "features-gandiva", - "features-python" + "features-gandiva" ], "hidden": true, "displayName": "Debug build with everything enabled (except benchmarks and CUDA)", @@ -194,12 +185,6 @@ "displayName": "Debug build with tests and Gandiva", "cacheVariables": {} }, - { - "name": "ninja-debug-python", - "inherits": ["base-debug", "features-python"], - "displayName": "Debug build with tests and Python support", - "cacheVariables": {} - }, { "name": "ninja-debug-maximal", "inherits": ["base-debug", "features-maximal"], @@ -243,12 +228,6 @@ "displayName": "Release build with Gandiva", "cacheVariables": {} }, - { - "name": "ninja-release-python", - "inherits": ["base-release", "features-python"], - "displayName": "Release build with Python support", - "cacheVariables": {} - }, { "name": "ninja-release-maximal", "inherits": ["base-release", "features-maximal"], diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 6b77e36db7731..9f43c9fa6f94e 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -50,6 +50,18 @@ function(list_join lst glue out) endfunction() macro(define_option name description default) + set(options) + set(one_value_args) + set(multi_value_args DEPENDS) + cmake_parse_arguments(ARG + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + check_description_length(${name} ${description}) list_join(description "\n" multiline_description) @@ -59,6 +71,7 @@ macro(define_option name description default) set("${name}_OPTION_DESCRIPTION" ${description}) set("${name}_OPTION_DEFAULT" ${default}) set("${name}_OPTION_TYPE" "bool") + set("${name}_OPTION_DEPENDS" ${ARG_DEPENDS}) endmacro() macro(define_option_string name description default) @@ -81,6 +94,48 @@ macro(define_option_string name description default) endif() endmacro() +# Topological sort by Tarjan's algorithm. +set(ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED) +macro(tsort_bool_option_dependencies_visit option_name) + if("${${option_name}_TSORT_STATUS}" STREQUAL "VISITING") + message(FATAL_ERROR "Cycled option dependency is detected: ${option_name}") + elseif("${${option_name}_TSORT_STATUS}" STREQUAL "") + set(${option_name}_TSORT_STATUS "VISITING") + foreach(needed_option_name ${${option_name}_OPTION_DEPENDS}) + tsort_bool_option_dependencies_visit(${needed_option_name}) + endforeach() + set(${option_name}_TSORT_STATUS "VISITED") + list(INSERT ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED 0 ${option_name}) + endif() +endmacro() +macro(tsort_bool_option_dependencies) + foreach(category ${ARROW_OPTION_CATEGORIES}) + foreach(option_name ${ARROW_${category}_OPTION_NAMES}) + if("${${option_name}_OPTION_TYPE}" STREQUAL "bool") + if("${${option_name}_TSORT_STATUS}" STREQUAL "") + tsort_bool_option_dependencies_visit(${option_name}) + endif() + endif() + endforeach() + endforeach() +endmacro() + +macro(resolve_option_dependencies) + if(MSVC_TOOLCHAIN) + # Plasma using glog is not fully tested on windows. + set(ARROW_USE_GLOG OFF) + endif() + + tsort_bool_option_dependencies() + foreach(option_name ${ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED}) + foreach(needed_option_name ${${option_name}_OPTION_DEPENDS}) + if(${${option_name}}) + set(${needed_option_name} ON) + endif() + endforeach() + endforeach() +endmacro() + # Top level cmake dir if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") set(ARROW_DEFINE_OPTIONS_DEFAULT ON) @@ -168,14 +223,27 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_BUILD_EXAMPLES "Build the Arrow examples" OFF) - define_option(ARROW_BUILD_TESTS "Build the Arrow googletest unit tests" OFF) + define_option(ARROW_BUILD_TESTS + "Build the Arrow googletest unit tests" + OFF + DEPENDS + ARROW_IPC + ARROW_TESTING) define_option(ARROW_ENABLE_TIMING_TESTS "Enable timing-sensitive tests" ON) - define_option(ARROW_BUILD_INTEGRATION "Build the Arrow integration test executables" - OFF) + define_option(ARROW_BUILD_INTEGRATION + "Build the Arrow integration test executables" + OFF + DEPENDS + ARROW_TESTING) - define_option(ARROW_BUILD_BENCHMARKS "Build the Arrow micro benchmarks" OFF) + define_option(ARROW_BUILD_BENCHMARKS + "Build the Arrow micro benchmarks" + OFF + DEPENDS + ARROW_IPC + ARROW_TESTING) # Reference benchmarks are used to compare to naive implementation, or # discover various hardware limits. @@ -200,7 +268,11 @@ takes precedence over ccache if a storage backend is configured" ON) "shared" "static") - define_option(ARROW_FUZZING "Build Arrow Fuzzing executables" OFF) + define_option(ARROW_FUZZING + "Build Arrow Fuzzing executables" + OFF + DEPENDS + ARROW_TESTING) define_option(ARROW_LARGE_MEMORY_TESTS "Enable unit tests which use large memory" OFF) @@ -235,18 +307,39 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_CSV "Build the Arrow CSV Parser Module" OFF) - define_option(ARROW_CUDA "Build the Arrow CUDA extensions (requires CUDA toolkit)" OFF) + define_option(ARROW_CUDA + "Build the Arrow CUDA extensions (requires CUDA toolkit)" + OFF + DEPENDS + ARROW_IPC) - define_option(ARROW_DATASET "Build the Arrow Dataset Modules" OFF) + define_option(ARROW_DATASET + "Build the Arrow Dataset Modules" + OFF + DEPENDS + ARROW_COMPUTE + ARROW_FILESYSTEM) define_option(ARROW_FILESYSTEM "Build the Arrow Filesystem Layer" OFF) define_option(ARROW_FLIGHT - "Build the Arrow Flight RPC System (requires GRPC, Protocol Buffers)" OFF) - - define_option(ARROW_FLIGHT_SQL "Build the Arrow Flight SQL extension" OFF) - - define_option(ARROW_GANDIVA "Build the Gandiva libraries" OFF) + "Build the Arrow Flight RPC System (requires GRPC, Protocol Buffers)" + OFF + DEPENDS + ARROW_IPC) + + define_option(ARROW_FLIGHT_SQL + "Build the Arrow Flight SQL extension" + OFF + DEPENDS + ARROW_FLIGHT) + + define_option(ARROW_GANDIVA + "Build the Gandiva libraries" + OFF + DEPENDS + ARROW_WITH_RE2 + ARROW_WITH_UTF8PROC) define_option(ARROW_GCS "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF) @@ -271,23 +364,50 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" OFF) - define_option(ARROW_PARQUET "Build the Parquet libraries" OFF) - - define_option(ARROW_ORC "Build the Arrow ORC adapter" OFF) + define_option(ARROW_PARQUET + "Build the Parquet libraries" + OFF + DEPENDS + ARROW_COMPUTE + ARROW_IPC) + + define_option(ARROW_ORC + "Build the Arrow ORC adapter" + OFF + DEPENDS + ARROW_WITH_LZ4 + ARROW_WITH_SNAPPY + ARROW_WITH_ZLIB + ARROW_WITH_ZSTD) define_option(ARROW_PLASMA "Build the plasma object store along with Arrow" OFF) - define_option(ARROW_PYTHON "Build the Arrow CPython extensions" OFF) - define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF) - define_option(ARROW_SKYHOOK "Build the Skyhook libraries" OFF) - - define_option(ARROW_SUBSTRAIT "Build the Arrow Substrait Consumer Module" OFF) + define_option(ARROW_SKYHOOK + "Build the Skyhook libraries" + OFF + DEPENDS + ARROW_DATASET + ARROW_PARQUET + ARROW_WITH_LZ4 + ARROW_WITH_SNAPPY) + + define_option(ARROW_SUBSTRAIT + "Build the Arrow Substrait Consumer Module" + OFF + DEPENDS + ARROW_DATASET + ARROW_IPC + ARROW_PARQUET) define_option(ARROW_TENSORFLOW "Build Arrow with TensorFlow support enabled" OFF) - define_option(ARROW_TESTING "Build the Arrow testing libraries" OFF) + define_option(ARROW_TESTING + "Build the Arrow testing libraries" + OFF + DEPENDS + ARROW_JSON) #---------------------------------------------------------------------- set_option_category("Thirdparty toolchain") @@ -489,6 +609,8 @@ that have not been built" option(ARROW_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file" ON) + + resolve_option_dependencies() endif() macro(validate_config) From b8ccd84a03acbfa4855c3463084664d149632f7f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 27 Sep 2022 06:02:41 +0900 Subject: [PATCH 2/4] Fix wording Co-authored-by: Antoine Pitrou --- cpp/cmake_modules/DefineOptions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 9f43c9fa6f94e..009741a709acc 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -98,7 +98,7 @@ endmacro() set(ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED) macro(tsort_bool_option_dependencies_visit option_name) if("${${option_name}_TSORT_STATUS}" STREQUAL "VISITING") - message(FATAL_ERROR "Cycled option dependency is detected: ${option_name}") + message(FATAL_ERROR "Cyclic option dependency is detected: ${option_name}") elseif("${${option_name}_TSORT_STATUS}" STREQUAL "") set(${option_name}_TSORT_STATUS "VISITING") foreach(needed_option_name ${${option_name}_OPTION_DEPENDS}) From fdfa168139cc07a0e996324bbb5703f9d17581fb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 27 Sep 2022 06:11:35 +0900 Subject: [PATCH 3/4] Fix if position --- cpp/cmake_modules/DefineOptions.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 009741a709acc..9ba06ecd90570 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -128,9 +128,9 @@ macro(resolve_option_dependencies) tsort_bool_option_dependencies() foreach(option_name ${ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED}) - foreach(needed_option_name ${${option_name}_OPTION_DEPENDS}) - if(${${option_name}}) - set(${needed_option_name} ON) + if(${${option_name}}) + foreach(depended_option_name ${${option_name}_OPTION_DEPENDS}) + set(${depended_option_name} ON) endif() endforeach() endforeach() From 2cd791d6c05d79c1784c5647083437dabd83b9d6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 27 Sep 2022 09:02:38 +0900 Subject: [PATCH 4/4] Fix end mark --- cpp/cmake_modules/DefineOptions.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 9ba06ecd90570..d5ebf18d56f8e 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -131,8 +131,8 @@ macro(resolve_option_dependencies) if(${${option_name}}) foreach(depended_option_name ${${option_name}_OPTION_DEPENDS}) set(${depended_option_name} ON) - endif() - endforeach() + endforeach() + endif() endforeach() endmacro()