Skip to content

Commit

Permalink
GH-34615: [CI][C++] Add CI job for basic format support without ARROW…
Browse files Browse the repository at this point in the history
…_COMPUTE (#34617)

This adds a crossbow job for `ARROW_IPC`, `ARROW_PARQUET`, and `ARROW_CSV` - based on a minimal Ubuntu image.

The job primarily aims to test the core Arrow library + basic format support without the full kernel registry provided by `ARROW_COMPUTE`. Note that `ARROW_JSON` is implicitly enabled as well, since it's a dependency of `ARROW_TESTING`.

* Closes: #34615
* Closes: #34655 

Authored-by: benibus <bpharks@gmx.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
benibus committed Mar 23, 2023
1 parent 3600bd8 commit 4487be0
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 92 deletions.
1 change: 1 addition & 0 deletions ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
tzdata \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
tzdata \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*
Expand Down
47 changes: 26 additions & 21 deletions cpp/src/arrow/compute/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ arrow_add_pkg_config("arrow-compute")
# Unit tests
#

# The following kernels are always present:
set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute")
set(ARROW_COMPUTE_TEST_LABELS "arrow_compute")
set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS
${ARROW_COMPUTE_TEST_LABELS})

# This will only add the test if ARROW_COMPUTE is enabled, meaning the full kernel registry is available.
#
# The following kernels are always present in default builds:
# - array_filter
# - array_take
# - cast
Expand All @@ -38,10 +45,12 @@ arrow_add_pkg_config("arrow-compute")
# - unique
# - value_counts
#
# Tests that use additional kernels should specify REQUIRE_ALL_KERNELS to avoid
# being included in minimal builds. See: GH-34388
# Also see: GH-34388, GH-34615
function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
set(options REQUIRE_ALL_KERNELS)
if(NOT ARROW_COMPUTE)
return()
endif()

set(one_value_args PREFIX)
set(multi_value_args LABELS)
cmake_parse_arguments(ARG
Expand All @@ -50,41 +59,37 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
"${multi_value_args}"
${ARGN})

if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
return()
endif()

if(ARG_PREFIX)
set(PREFIX ${ARG_PREFIX})
else()
set(PREFIX "arrow-compute")
set(PREFIX ${ARROW_COMPUTE_TEST_PREFIX})
endif()

if(ARG_LABELS)
set(LABELS ${ARG_LABELS})
else()
set(LABELS "arrow_compute")
set(LABELS ${ARROW_COMPUTE_TEST_LABELS})
endif()

add_arrow_test(${REL_TEST_NAME}
EXTRA_LINK_LIBS
${ARROW_DATASET_TEST_LINK_LIBS}
PREFIX
${PREFIX}
LABELS
${LABELS}
${ARG_UNPARSED_ARGUMENTS})
endfunction()

add_arrow_compute_test(internals_test
SOURCES
function_test.cc
exec_test.cc
kernel_test.cc
light_array_test.cc
registry_test.cc
key_hash_test.cc
expression_test.cc)
add_arrow_test(internals_test
${ARROW_COMPUTE_TEST_ARGS}
SOURCES
function_test.cc
exec_test.cc
kernel_test.cc
light_array_test.cc
registry_test.cc
key_hash_test.cc)

add_arrow_compute_test(expression_test SOURCES expression_test.cc)

add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")

Expand Down
59 changes: 10 additions & 49 deletions cpp/src/arrow/compute/exec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,63 +17,24 @@

arrow_install_all_headers("arrow/compute/exec")

add_arrow_compute_test(subtree_test
REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
subtree_test.cc)
add_arrow_compute_test(subtree_test SOURCES subtree_test.cc)

add_arrow_compute_test(plan_test
REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
plan_test.cc
test_nodes_test.cc
test_nodes.cc)
add_arrow_compute_test(fetch_node_test
REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
fetch_node_test.cc
test_nodes.cc)
add_arrow_compute_test(hash_join_node_test
REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
hash_join_node_test.cc
add_arrow_compute_test(fetch_node_test SOURCES fetch_node_test.cc test_nodes.cc)
add_arrow_compute_test(hash_join_node_test SOURCES hash_join_node_test.cc
bloom_filter_test.cc)
add_arrow_compute_test(order_by_node_test
PREFIX
"arrow-compute"
SOURCES
order_by_node_test.cc
add_arrow_compute_test(order_by_node_test SOURCES order_by_node_test.cc test_nodes.cc)
add_arrow_compute_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc
test_nodes.cc)
add_arrow_compute_test(pivot_longer_node_test
PREFIX
"arrow-compute"
SOURCES
pivot_longer_node_test.cc
test_nodes.cc)
add_arrow_compute_test(asof_join_node_test
REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
asof_join_node_test.cc
test_nodes.cc)
add_arrow_compute_test(tpch_node_test PREFIX "arrow-compute")
add_arrow_compute_test(union_node_test PREFIX "arrow-compute")
add_arrow_compute_test(groupby_test REQUIRE_ALL_KERNELS PREFIX "arrow-compute")
add_arrow_compute_test(util_test
PREFIX
"arrow-compute"
SOURCES
util_test.cc
task_util_test.cc)
add_arrow_compute_test(asof_join_node_test SOURCES asof_join_node_test.cc test_nodes.cc)
add_arrow_compute_test(tpch_node_test)
add_arrow_compute_test(union_node_test)
add_arrow_compute_test(groupby_test)
add_arrow_compute_test(util_test SOURCES util_test.cc task_util_test.cc)

add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")

Expand Down
36 changes: 14 additions & 22 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,40 +16,39 @@
# under the License.

# ----------------------------------------------------------------------
# Scalar kernels
# Tests that don't require the full kernel library

add_arrow_test(scalar_cast_test
${ARROW_COMPUTE_TEST_ARGS}
SOURCES
scalar_cast_test.cc
test_util.cc)

add_arrow_test(kernel_utility_test ${ARROW_COMPUTE_TEST_ARGS} SOURCES
codegen_internal_test.cc)

add_arrow_compute_test(scalar_cast_test SOURCES scalar_cast_test.cc test_util.cc)
# ----------------------------------------------------------------------
# Scalar kernels

add_arrow_compute_test(scalar_type_test
REQUIRE_ALL_KERNELS
SOURCES
scalar_boolean_test.cc
scalar_nested_test.cc
scalar_string_test.cc
test_util.cc)

add_arrow_compute_test(scalar_if_else_test
REQUIRE_ALL_KERNELS
SOURCES
scalar_if_else_test.cc
test_util.cc)
add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc test_util.cc)

add_arrow_compute_test(scalar_temporal_test
REQUIRE_ALL_KERNELS
SOURCES
scalar_temporal_test.cc
test_util.cc)
add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc test_util.cc)

add_arrow_compute_test(scalar_math_test
REQUIRE_ALL_KERNELS
SOURCES
scalar_arithmetic_test.cc
scalar_compare_test.cc
scalar_round_arithmetic_test.cc
test_util.cc)

add_arrow_compute_test(scalar_utility_test
REQUIRE_ALL_KERNELS
SOURCES
scalar_random_test.cc
scalar_set_lookup_test.cc
Expand All @@ -71,7 +70,6 @@ add_arrow_benchmark(scalar_temporal_benchmark PREFIX "arrow-compute")
# Vector kernels

add_arrow_compute_test(vector_test
REQUIRE_ALL_KERNELS
SOURCES
vector_cumulative_ops_test.cc
vector_hash_test.cc
Expand All @@ -96,14 +94,8 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
# Aggregates

add_arrow_compute_test(aggregate_test
REQUIRE_ALL_KERNELS
SOURCES
aggregate_test.cc
hash_aggregate_test.cc
test_util.cc)
add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")

# ----------------------------------------------------------------------
# Utilities

add_arrow_compute_test(kernel_utility_test SOURCES codegen_internal_test.cc)
9 changes: 9 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,15 @@ tasks:
UBUNTU: 20.04
image: ubuntu-cpp-thread-sanitizer

test-ubuntu-20.04-cpp-minimal-with-formats:
ci: github
template: docker-tests/github.linux.yml
params:
env:
UBUNTU: 20.04
flags: "-e ARROW_CSV=ON -e ARROW_PARQUET=ON"
image: ubuntu-cpp-minimal

{% for python_version in ["3.7", "3.8", "3.9", "3.10", "3.11"] %}
test-conda-python-{{ python_version }}:
ci: github
Expand Down
43 changes: 43 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ x-hierarchy:
- ubuntu-r
- ubuntu-r-only-r
- ubuntu-cpp-bundled
- ubuntu-cpp-minimal
- ubuntu-cuda-cpp:
- ubuntu-cuda-python
- ubuntu-csharp
Expand Down Expand Up @@ -467,6 +468,48 @@ services:
volumes: *ubuntu-volumes
command: *cpp-command

ubuntu-cpp-minimal:
# Arrow build with minimal components/dependencies
image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp-minimal
build:
context: .
dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp-minimal.dockerfile
cache_from:
- ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp-minimal
args:
arch: ${ARCH}
base: "${ARCH}/ubuntu:${UBUNTU}"
llvm: ${LLVM}
shm_size: *shm-size
ulimits: *ulimits
environment:
<<: [*ccache, *sccache]
ARROW_BUILD_UTILITIES: "OFF"
ARROW_COMPUTE: "OFF"
ARROW_CSV: "OFF"
ARROW_DATASET: "OFF"
ARROW_FILESYSTEM: "OFF"
ARROW_FLIGHT: "OFF"
ARROW_GANDIVA: "OFF"
ARROW_GCS: "OFF"
ARROW_HDFS: "OFF"
ARROW_ORC: "OFF"
ARROW_PARQUET: "OFF"
ARROW_PLASMA: "OFF"
ARROW_S3: "OFF"
ARROW_SUBSTRAIT: "OFF"
ARROW_WITH_BROTLI: "OFF"
ARROW_WITH_BZ2: "OFF"
ARROW_WITH_LZ4: "OFF"
ARROW_WITH_SNAPPY: "OFF"
ARROW_WITH_ZLIB: "OFF"
ARROW_WITH_ZSTD: "OFF"
PARQUET_BUILD_EXAMPLES: "OFF"
PARQUET_BUILD_EXECUTABLES: "OFF"
PARQUET_REQUIRE_ENCRYPTION: "OFF"
volumes: *ubuntu-volumes
command: *cpp-command

ubuntu-cuda-cpp:
# Usage:
# docker-compose build cuda-cpp
Expand Down

0 comments on commit 4487be0

Please sign in to comment.