Skip to content

Commit

Permalink
ARROW-16340: [C++][Python] Move all Python related code into PyArrow (#…
Browse files Browse the repository at this point in the history
…13311)

This PR moves `src/arrow/python` directory into `pyarrow` and arranges PyArrow to build it. The build on the Python side is made in two steps:

1. `_run_cmake_pyarrow_cpp()` where the C++ part of the pyarrow is build first (the part that was moved in the refactoring)
2. `_run_cmake()` where pyarrow is built as before

No changes are needed in the build process from the user side to successfully build pyarrow after this refactoring. The test for PyArrow CPP will however be moved into Cython and can currently be run with:

```shell
>>> pushd python/build/dist/temp 
>>> ctest
```

Lead-authored-by: Alenka Frim <frim.alenka@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
AlenkaF and jorisvandenbossche committed Aug 26, 2022
1 parent 7e7b8e1 commit b832853
Show file tree
Hide file tree
Showing 103 changed files with 862 additions and 650 deletions.
11 changes: 11 additions & 0 deletions ci/scripts/python_test.sh
Expand Up @@ -20,6 +20,7 @@
set -ex

arrow_dir=${1}
test_dir=${1}/python/build/dist

export ARROW_SOURCE_DIR=${arrow_dir}
export ARROW_TEST_DATA=${arrow_dir}/testing/data
Expand Down Expand Up @@ -54,4 +55,14 @@ export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_S3

# Testing PyArrow C++
if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
pushd ${test_dir}
ctest \
--output-on-failure \
--parallel ${n_jobs} \
--timeout 300
popd
fi
# Testing PyArrow
pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_macos_build.sh
Expand Up @@ -156,6 +156,7 @@ export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_WITH_S3=${ARROW_S3}
export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
export ARROW_HOME=${build_dir}/install
# PyArrow build configuration
export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
# Set PyArrow version explicitly
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_manylinux_build.sh
Expand Up @@ -151,6 +151,7 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION}
export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_WITH_S3=${ARROW_S3}
export ARROW_HOME=/tmp/arrow-dist
# PyArrow build configuration
export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig

Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/FindArrowPython.cmake
Expand Up @@ -47,7 +47,7 @@ find_package(Arrow ${find_package_arguments})

if(ARROW_FOUND)
arrow_find_package(ARROW_PYTHON
"${ARROW_HOME}"
"${PYARROW_CPP_HOME}"
arrow_python
arrow/python/api.h
ArrowPython
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/FindArrowPythonFlight.cmake
Expand Up @@ -50,7 +50,7 @@ find_package(ArrowPython ${find_package_arguments})

if(ARROW_PYTHON_FOUND AND ARROW_FLIGHT_FOUND)
arrow_find_package(ARROW_PYTHON_FLIGHT
"${ARROW_HOME}"
"${PYARROW_CPP_HOME}"
arrow_python_flight
arrow/python/flight.h
ArrowPythonFlight
Expand Down
4 changes: 0 additions & 4 deletions cpp/src/arrow/CMakeLists.txt
Expand Up @@ -805,10 +805,6 @@ if(ARROW_ORC)
add_subdirectory(adapters/orc)
endif()

if(ARROW_PYTHON)
add_subdirectory(python)
endif()

if(ARROW_TENSORFLOW)
add_subdirectory(adapters/tensorflow)
endif()
4 changes: 0 additions & 4 deletions cpp/src/arrow/public_api_test.cc
Expand Up @@ -50,10 +50,6 @@
#include "arrow/json/api.h" // IWYU pragma: keep
#endif

#ifdef ARROW_PYTHON
#include "arrow/python/api.h" // IWYU pragma: keep
#endif

#ifdef DCHECK
#error "DCHECK should not be visible from Arrow public headers."
#endif
Expand Down
208 changes: 0 additions & 208 deletions cpp/src/arrow/python/CMakeLists.txt

This file was deleted.

7 changes: 7 additions & 0 deletions dev/release/01-prepare-test.rb
Expand Up @@ -197,6 +197,13 @@ def test_version_pre_tag
"+set(MLARROW_VERSION \"#{@release_version}\")"],
],
},
{
path: "python/pyarrow/src/CMakeLists.txt",
hunks: [
["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")",
"+set(ARROW_PYTHON_VERSION \"#{@release_version}\")"],
],
},
{
path: "python/setup.py",
hunks: [
Expand Down
7 changes: 7 additions & 0 deletions dev/release/post-11-bump-versions-test.rb
Expand Up @@ -144,6 +144,13 @@ def test_version_post_tag
"+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
],
},
{
path: "python/pyarrow/src/CMakeLists.txt",
hunks: [
["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")",
"+set(ARROW_PYTHON_VERSION \"#{@next_snapshot_version}\")"],
],
},
{
path: "python/setup.py",
hunks: [
Expand Down
4 changes: 0 additions & 4 deletions dev/release/rat_exclude_files.txt
Expand Up @@ -98,10 +98,6 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install
dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
Expand Down
8 changes: 8 additions & 0 deletions dev/release/utils-prepare.sh
Expand Up @@ -121,6 +121,14 @@ update_versions() {
git add setup.py
popd

pushd "${ARROW_DIR}/python/pyarrow/src"
sed -i.bak -E -e \
"s/^set\(ARROW_PYTHON_VERSION \".+\"\)/set(ARROW_PYTHON_VERSION \"${version}\")/" \
CMakeLists.txt
rm -f CMakeLists.txt.bak
git add CMakeLists.txt
popd

pushd "${ARROW_DIR}/r"
sed -i.bak -E -e \
"s/^Version: .+/Version: ${r_version}/" \
Expand Down
7 changes: 0 additions & 7 deletions dev/release/verify-apt.sh
Expand Up @@ -198,13 +198,6 @@ ruby -r gi -e "p GI.load('ArrowFlightSQL')"
echo "::endgroup::"


if [ "${have_python}" = "yes" ]; then
echo "::group::Test libarrow-python"
${APT_INSTALL} libarrow-python-dev=${package_version}
echo "::endgroup::"
fi


if [ "${have_plasma}" = "yes" ]; then
echo "::group::Test Plasma"
${APT_INSTALL} libplasma-glib-dev=${package_version}
Expand Down
6 changes: 0 additions & 6 deletions dev/release/verify-yum.sh
Expand Up @@ -250,12 +250,6 @@ if [ "${have_flight}" = "yes" ]; then
echo "::endgroup::"
fi

if [ "${have_python}" = "yes" ]; then
echo "::group::Test libarrow-python"
${install_command} --enablerepo=epel arrow-python-devel-${package_version}
echo "::endgroup::"
fi

echo "::group::Test Plasma"
if [ "${have_glib}" = "yes" ]; then
${install_command} --enablerepo=epel plasma-glib-devel-${package_version}
Expand Down

0 comments on commit b832853

Please sign in to comment.