Skip to content

Commit

Permalink
ARROW-7937: [Python][Packaging] Remove boost from the macos wheels
Browse files Browse the repository at this point in the history
Closes #6485 from kszucs/remove-boost-from-macos-wheels and squashes the following commits:

89d4a8c <Krisztián Szűcs> remove comments
0bc3a18 <Krisztián Szűcs> don' skip parquet tests if cloudpickle is not available
82572e6 <Krisztián Szűcs> cflags
61eb361 <Krisztián Szűcs> try to remove boost from the macos wheels

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
  • Loading branch information
kszucs committed Feb 26, 2020
1 parent 396861b commit cb65a91
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 76 deletions.
69 changes: 2 additions & 67 deletions dev/tasks/python-wheels/osx-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,65 +37,10 @@ function build_wheel {

pushd $1

boost_version="1.66.0"
boost_directory_name="boost_${boost_version//\./_}"
boost_tarball_name="${boost_directory_name}.tar.gz"
wget -nv --no-check-certificate \
https://downloads.sourceforge.net/project/boost/boost/"${boost_version}"/"${boost_tarball_name}" \
-O "${boost_tarball_name}"
tar xf "${boost_tarball_name}"

arrow_boost="$PWD/arrow_boost"
arrow_boost_dist="$PWD/arrow_boost_dist"
mkdir "$arrow_boost" "$arrow_boost_dist"

# Arrow is 64-bit-only at the moment
export CFLAGS="-fPIC -arch x86_64 ${CFLAGS//"-arch i386"/}"
export CXXFLAGS="-fPIC -arch x86_64 ${CXXFLAGS//"-arch i386"} -std=c++11"

# Build Boost's bcp tool to create a custom namespaced boost build.
# Using this build, we can dynamically link our own boost build and
# don't need to fear any clashes with system / thirdparty provided versions
# of Boost.
pushd "${boost_directory_name}"
./bootstrap.sh
./b2 tools/bcp > /dev/null 2>&1
./dist/bin/bcp --namespace=arrow_boost --namespace-alias \
filesystem date_time system regex build algorithm locale format \
multiprecision/cpp_int "$arrow_boost" > /dev/null 2>&1
popd

# Now build our custom namespaced Boost version.
pushd "$arrow_boost"
./bootstrap.sh
./bjam cxxflags="${CXXFLAGS}" \
linkflags="-std=c++11" \
cflags="${CFLAGS}" \
variant=release \
link=shared \
--prefix="$arrow_boost_dist" \
--with-filesystem --with-date_time --with-system --with-regex \
install > /dev/null 2>&1
popd

# The boost libraries don't set an explicit install name and we have not
# yet found the correct option on `bjam` to set the install name to the
# one we desire.
#
# Set it to @rpath/<binary_name> so that they are search in the same
# directory as the library that loaded them.
pushd "${arrow_boost_dist}"/lib
for dylib in *.dylib; do
install_name_tool -id @rpath/${dylib} ${dylib}
done
# Manually adjust libarrow_boost_filesystem.dylib which also references
# libarrow_boost_system.dylib. It's reference should be to the
# libarrow_boost_system.dylib with an @rpath prefix so that it also
# searches for it in the local folder.
install_name_tool -change libarrow_boost_system.dylib @rpath/libarrow_boost_system.dylib libarrow_boost_filesystem.dylib
popd

# Now we can start with the actual build of Arrow and Parquet.
# We pin NumPy to an old version here as the NumPy version one builds
# with is the oldest supported one. Thanks to NumPy's guarantees our Arrow
# build will also work with newer NumPy versions.
Expand All @@ -113,8 +58,8 @@ function build_wheel {
pushd cpp
mkdir build
pushd build
cmake -DARROW_BOOST_USE_SHARED=ON \
-DARROW_BUILD_SHARED=ON \

cmake -DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=OFF \
-DARROW_DATASET=ON \
-DARROW_DEPENDENCY_SOURCE=BUNDLED \
Expand All @@ -135,10 +80,6 @@ function build_wheel {
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DBoost_NAMESPACE=arrow_boost \
-DBoost_NO_BOOST_CMAKE=ON \
-DBOOST_ROOT="$arrow_boost_dist" \
-DBOOST_SOURCE=SYSTEM \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DgRPC_SOURCE=SYSTEM \
Expand Down Expand Up @@ -167,14 +108,8 @@ function build_wheel {
export PYARROW_WITH_ORC=0
export PYARROW_WITH_JEMALLOC=1
export PYARROW_WITH_PLASMA=1
export PYARROW_BUNDLE_BOOST=1
export PYARROW_BUNDLE_ARROW_CPP=1
export PYARROW_BUILD_TYPE='release'
export PYARROW_BOOST_NAMESPACE='arrow_boost'
PYARROW_CMAKE_OPTIONS=""
PYARROW_CMAKE_OPTIONS="${PYARROW_CMAKE_OPTIONS} -DBOOST_ROOT=$arrow_boost_dist"
PYARROW_CMAKE_OPTIONS="${PYARROW_CMAKE_OPTIONS} -DBoost_NO_BOOST_CMAKE=ON"
export PYARROW_CMAKE_OPTIONS
export SETUPTOOLS_SCM_PRETEND_VERSION=$PYARROW_VERSION
pushd python
python setup.py build_ext bdist_wheel
Expand Down
23 changes: 14 additions & 9 deletions python/pyarrow/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import io
import json
import os
import pickle
import pytest

import numpy as np
Expand All @@ -32,6 +31,7 @@
from pyarrow.tests import util
from pyarrow.filesystem import LocalFileSystem, FileSystem


try:
import pyarrow.parquet as pq
except ImportError:
Expand Down Expand Up @@ -2797,17 +2797,10 @@ def _make_dataset_for_pickling(tempdir, N=100):
return dataset


@pytest.mark.pandas
@pytest.mark.parametrize('pickler', [
pytest.param(pickle, id='builtin'),
pytest.param(pytest.importorskip('cloudpickle'), id='cloudpickle')
])
def test_pickle_dataset(tempdir, datadir, pickler):
def _assert_dataset_is_picklable(dataset, pickler):
def is_pickleable(obj):
return obj == pickler.loads(pickler.dumps(obj))

dataset = _make_dataset_for_pickling(tempdir)

assert is_pickleable(dataset)
assert is_pickleable(dataset.metadata)
assert is_pickleable(dataset.metadata.schema)
Expand All @@ -2823,6 +2816,18 @@ def is_pickleable(obj):
assert is_pickleable(metadata.row_group(i))


def test_builtin_pickle_dataset(tempdir, datadir):
import pickle
dataset = _make_dataset_for_pickling(tempdir)
_assert_dataset_is_picklable(dataset, pickler=pickle)


def test_cloudpickle_dataset(tempdir, datadir):
cp = pytest.importorskip('cloudpickle')
dataset = _make_dataset_for_pickling(tempdir)
_assert_dataset_is_picklable(dataset, pickler=cp)


@pytest.mark.pandas
def test_decimal_roundtrip(tempdir):
num_values = 10
Expand Down

0 comments on commit cb65a91

Please sign in to comment.