Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apache/arrow into js-cpp-refactor…
Browse files Browse the repository at this point in the history
…-merge_with-table-scan-perf
  • Loading branch information
trxcllnt committed Jan 19, 2018
2 parents d2b18d5 + bc9f9e5 commit 6c91ed4
Show file tree
Hide file tree
Showing 11 changed files with 161 additions and 50 deletions.
7 changes: 6 additions & 1 deletion c_glib/arrow-glib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

CLEANFILES =
DISTCLEANFILES =

EXTRA_DIST = \
meson.build
Expand Down Expand Up @@ -169,6 +170,10 @@ BUILT_SOURCES = \
stamp-enums.c \
stamp-enums.h

DISTCLEANFILES += \
stamp-enums.c \
stamp-enums.h

EXTRA_DIST += \
enums.c.template \
enums.h.template
Expand Down Expand Up @@ -214,7 +219,7 @@ INTROSPECTION_SCANNER_ARGS =
INTROSPECTION_SCANNER_ENV =
if USE_ARROW_BUILD_DIR
INTROSPECTION_SCANNER_ENV += \
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${PKG_CONFIG_PATH}
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${LD_LIBRARY_PATH}
endif
if OS_MACOS
INTROSPECTION_SCANNER_ENV += \
Expand Down
4 changes: 4 additions & 0 deletions cpp/apidoc/HDFS.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
* `ARROW_LIBHDFS_DIR` (optional): explicit location of `libhdfs.so` if it is
installed somewhere other than `$HADOOP_HOME/lib/native`.

To accommodate distribution-specific nuances, the `JAVA_HOME` variable may be
set to the root path for the Java SDK, the JRE path itself, or to the directory
containing the `libjvm` library.

### Mac Specifics

The installed location of Java on OS X can vary, however the following snippet
Expand Down
18 changes: 16 additions & 2 deletions cpp/cmake_modules/FindParquet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,22 @@ if(PARQUET_HOME)
PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
PATH_SUFFIXES "lib")
get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH )
set(PARQUET_ABI_VERSION "1.0.0")
set(PARQUET_SO_VERSION "1")

# Try to autodiscover the Parquet ABI version
get_filename_component(PARQUET_LIB_REALPATH ${PARQUET_LIBRARIES} REALPATH)
get_filename_component(PARQUET_EXT_REALPATH ${PARQUET_LIB_REALPATH} EXT)
string(REGEX MATCH ".([0-9]+.[0-9]+.[0-9]+)" HAS_ABI_VERSION ${PARQUET_EXT_REALPATH})
if (HAS_ABI_VERSION)
if (APPLE)
string(REGEX REPLACE ".([0-9]+.[0-9]+.[0-9]+).dylib" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
else()
string(REGEX REPLACE ".so.([0-9]+.[0-9]+.[0-9]+)" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
endif()
string(REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" PARQUET_SO_VERSION ${PARQUET_ABI_VERSION})
else()
set(PARQUET_ABI_VERSION "1.0.0")
set(PARQUET_SO_VERSION "1")
endif()
else()
pkg_check_modules(PARQUET parquet)
if (PARQUET_FOUND)
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/buffer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) {
ASSERT_TRUE(slice->Equals(expected));
}

TEST(TestBufferBuilder, ResizeReserve) {
const std::string data = "some data";
auto data_ptr = data.c_str();

BufferBuilder builder;

ASSERT_OK(builder.Append(data_ptr, 9));
ASSERT_EQ(9, builder.length());

ASSERT_OK(builder.Resize(128));
ASSERT_EQ(128, builder.capacity());

// Do not shrink to fit
ASSERT_OK(builder.Resize(64, false));
ASSERT_EQ(128, builder.capacity());

// Shrink to fit
ASSERT_OK(builder.Resize(64));
ASSERT_EQ(64, builder.capacity());

// Reserve elements
ASSERT_OK(builder.Reserve(60));
ASSERT_EQ(128, builder.capacity());
}

} // namespace arrow
41 changes: 30 additions & 11 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@
#include <string>
#include <type_traits>

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

class MemoryPool;

// ----------------------------------------------------------------------
// Buffer classes

/// Immutable API for a chunk of bytes which may or may not be owned by the
/// class instance.
/// \class Buffer
/// \brief Object containing a pointer to a piece of contiguous memory with a
/// particular size. Base class does not own its memory
///
/// Buffers have two related notions of length: size and capacity. Size is
/// the number of bytes that might have valid data. Capacity is the number
Expand Down Expand Up @@ -133,7 +133,8 @@ ARROW_EXPORT
std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
const int64_t offset, const int64_t length);

/// A Buffer whose contents can be mutated. May or may not own its data.
/// \class MutableBuffer
/// \brief A Buffer whose contents can be mutated. May or may not own its data.
class ARROW_EXPORT MutableBuffer : public Buffer {
public:
MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
Expand All @@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
MutableBuffer() : Buffer(NULLPTR, 0) {}
};

/// \class ResizableBuffer
/// \brief A mutable buffer that can be resized
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
public:
/// Change buffer reported size to indicated size, allocating memory if
Expand Down Expand Up @@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
MemoryPool* pool_;
};

/// \class BufferBuilder
/// \brief A class for incrementally building a contiguous chunk of in-memory data
class ARROW_EXPORT BufferBuilder {
public:
explicit BufferBuilder(MemoryPool* pool)
explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}

/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
Status Resize(const int64_t elements) {
/// \brief Resizes the buffer to the nearest multiple of 64 bytes
///
/// \param elements the new capacity of the of the builder. Will be rounded
/// up to a multiple of 64 bytes for padding
/// \param shrink_to_fit if new capacity smaller than existing size,
/// reallocate internal buffer. Set to false to avoid reallocations when
/// shrinking the builder
/// \return Status
Status Resize(const int64_t elements, bool shrink_to_fit = true) {
// Resize(0) is a no-op
if (elements == 0) {
return Status::OK();
Expand All @@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder {
buffer_ = std::make_shared<PoolBuffer>(pool_);
}
int64_t old_capacity = capacity_;
RETURN_NOT_OK(buffer_->Resize(elements));
RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
capacity_ = buffer_->capacity();
data_ = buffer_->mutable_data();
if (capacity_ > old_capacity) {
Expand All @@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}

Status Append(const uint8_t* data, int64_t length) {
/// \brief Ensure that builder can accommodate the additional number of bytes
/// without the need to perform allocations
///
/// \param size number of additional bytes to make space for
/// \return Status
Status Reserve(const int64_t size) { return Resize(size_ + size, false); }

Status Append(const void* data, int64_t length) {
if (capacity_ < length + size_) {
int64_t new_capacity = BitUtil::NextPower2(length + size_);
RETURN_NOT_OK(Resize(new_capacity));
Expand Down Expand Up @@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder {
}

// Unsafe methods don't check existing size
void UnsafeAppend(const uint8_t* data, int64_t length) {
void UnsafeAppend(const void* data, int64_t length) {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/io/hdfs-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
file_name = "jvm.dll";
#elif __APPLE__
search_prefixes = {""};
search_suffixes = {"", "/jre/lib/server"};
search_suffixes = {"", "/jre/lib/server", "/lib/server"};
file_name = "libjvm.dylib";

// SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
Expand Down Expand Up @@ -175,7 +175,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
"/usr/lib/jvm/default", // alt centos
"/usr/java/latest", // alt centos
};
search_suffixes = {"/jre/lib/amd64/server"};
search_suffixes = {"", "/jre/lib/amd64/server", "/lib/amd64/server"};
file_name = "libjvm.so";
#endif
// From direct environment variable
Expand Down
4 changes: 2 additions & 2 deletions dev/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
version: '3'
services:
gen_apidocs:
build:
build:
context: gen_apidocs
volumes:
- ../..:/apache-arrow
Expand All @@ -29,7 +29,7 @@ services:
volumes:
- ../..:/apache-arrow
dask_integration:
build:
build:
context: dask_integration
volumes:
- ../..:/apache-arrow
20 changes: 13 additions & 7 deletions dev/gen_apidocs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM ubuntu:14.04
# Prerequsites for apt-add-repository
RUN apt-get update && apt-get install -y \
software-properties-common python-software-properties
FROM ubuntu:16.04

# Basic OS dependencies
RUN apt-add-repository -y ppa:ubuntu-toolchain-r/test && \
apt-get update && apt-get install -y \
RUN apt-get update && apt-get install -y \
wget \
rsync \
git \
gcc-4.9 \
g++-4.9 \
build-essential
build-essential \
software-properties-common

# Java build fails with default JDK8
RUN add-apt-repository ppa:openjdk-r/ppa &&\
apt-get update &&\
apt-get install -y openjdk-7-jdk &&\
update-java-alternatives -s java-1.7.0-openjdk-amd64

# This will install conda in /home/ubuntu/miniconda
RUN wget -O /tmp/miniconda.sh \
https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
Expand Down Expand Up @@ -73,6 +78,7 @@ RUN /home/ubuntu/miniconda/bin/conda create -y -q -n pyarrow-dev \
doxygen \
maven \
-c conda-forge

ADD . /apache-arrow
WORKDIR /apache-arrow
CMD arrow/dev/gen_apidocs/create_documents.sh
73 changes: 48 additions & 25 deletions dev/gen_apidocs/create_documents.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export ARROW_HOME=$(pwd)/dist
export PARQUET_HOME=$(pwd)/dist
CONDA_BASE=/home/ubuntu/miniconda
export LD_LIBRARY_PATH=$(pwd)/dist/lib:${CONDA_BASE}/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=$(pwd)/dist/lib/pkgconfig:${PKG_CONFIG_PATH}
export PATH=${CONDA_BASE}/bin:${PATH}

# Prepare the asf-site before copying api docs
Expand All @@ -38,16 +39,38 @@ git clone --branch=asf-site \
https://git-wip-us.apache.org/repos/asf/arrow-site.git asf-site
popd

# Make Java documentation
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
wget http://mirrors.gigenet.com/apache/maven/maven-3/3.5.2/binaries/apache-maven-3.5.2-bin.tar.gz
tar xvf apache-maven-3.5.2-bin.tar.gz
export PATH=$(pwd)/apache-maven-3.5.2/bin:$PATH

pushd arrow/java
rm -rf target/site/apidocs/*
mvn -Drat.skip=true install
mvn -Drat.skip=true site
mkdir -p ../site/asf-site/docs/java/
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
popd

# Make Python documentation (Depends on C++ )
# Build Arrow C++
source activate pyarrow-dev

export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
export BOOST_ROOT=$CONDA_PREFIX
export PARQUET_BUILD_TOOLCHAIN=$CONDA_PREFIX
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig:${PKG_CONFIG_PATH}

export CC=gcc-4.9
export CXX=g++-4.9

rm -rf arrow/cpp/build_docs
mkdir arrow/cpp/build_docs
pushd arrow/cpp/build_docs
CPP_BUILD_DIR=$(pwd)/arrow/cpp/build_docs

rm -rf $CPP_BUILD_DIR
mkdir $CPP_BUILD_DIR
pushd $CPP_BUILD_DIR
cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_PYTHON=on \
Expand All @@ -58,6 +81,28 @@ make -j4
make install
popd

# Build c_glib documentation
pushd arrow/c_glib
if [ -f Makefile ]; then
# Ensure updating to prevent auto re-configure
touch configure **/Makefile
make distclean
# Work around for 'make distclean' removes doc/reference/xml/
git checkout doc/reference/xml
fi
./autogen.sh
rm -rf build_docs
mkdir build_docs
pushd build_docs
../configure \
--prefix=${AROW_HOME} \
--enable-gtk-doc
make -j4 GTK_DOC_V_XREF=": "
mkdir -p ../../site/asf-site/docs/c_glib
rsync -r doc/reference/html/ ../../site/asf-site/docs/c_glib
popd
popd

# Build Parquet C++
rm -rf parquet-cpp/build_docs
mkdir parquet-cpp/build_docs
Expand All @@ -83,32 +128,10 @@ mkdir -p ../site/asf-site/docs/python
rsync -r doc/_build/html/ ../site/asf-site/docs/python
popd

# Build c_glib documentation
pushd arrow/c_glib
rm -rf doc/reference/html/*
./autogen.sh
./configure \
--with-arrow-cpp-build-dir=$(pwd)/../cpp/build \
--with-arrow-cpp-build-type=$ARROW_BUILD_TYPE \
--enable-gtk-doc
LD_LIBRARY_PATH=$(pwd)/../cpp/build/$ARROW_BUILD_TYPE make GTK_DOC_V_XREF=": "
mkdir -p ../site/asf-site/docs/c_glib
rsync -r doc/reference/html/ ../site/asf-site/docs/c_glib
popd

# Make C++ documentation
pushd arrow/cpp/apidoc
rm -rf html/*
doxygen Doxyfile
mkdir -p ../../site/asf-site/docs/cpp
rsync -r html/ ../../site/asf-site/docs/cpp
popd

# Make Java documentation
pushd arrow/java
rm -rf target/site/apidocs/*
mvn -Drat.skip=true install
mvn -Drat.skip=true site
mkdir -p ../site/asf-site/docs/java/
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
popd

0 comments on commit 6c91ed4

Please sign in to comment.