Skip to content

Commit

Permalink
ARROW-17817: [C++] Let ORC compile on MSVC if it is activated (#14208)
Browse files Browse the repository at this point in the history
Lead-authored-by: LouisClt <louis1110@hotmail.fr>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
LouisClt and kou committed Oct 15, 2022
1 parent f0cf5c2 commit aeba616
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 53 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ jobs:
ARROW_HOME: /usr
ARROW_JEMALLOC: OFF
ARROW_MIMALLOC: ON
ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
Expand Down Expand Up @@ -286,7 +287,10 @@ jobs:
bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
- name: Test
shell: bash
run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
run: |
# For ORC
export TZDIR=/c/msys64/usr/share/zoneinfo
ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
windows-mingw:
name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
Expand Down
7 changes: 3 additions & 4 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_HDFS=ON ^
-DARROW_JSON=ON ^
-DARROW_MIMALLOC=ON ^
-DARROW_ORC=ON ^
-DARROW_PARQUET=ON ^
-DARROW_S3=%ARROW_S3% ^
-DARROW_SUBSTRAIT=ON ^
Expand All @@ -93,13 +94,11 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
.. || exit /B
cmake --build . --target install --config Release || exit /B

@rem Needed so arrow-python-test.exe works
set OLD_PYTHONHOME=%PYTHONHOME%
set PYTHONHOME=%CONDA_PREFIX%
@rem For ORC C++
set TZDIR=%CONDA_PREFIX%\share\zoneinfo

ctest --output-on-failure || exit /B

set PYTHONHOME=%OLD_PYTHONHOME%
popd

@rem
Expand Down
9 changes: 3 additions & 6 deletions ci/conda_env_cpp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,19 @@ brotli
bzip2
c-ares
cmake
flatbuffers
gflags
glog
gmock>=1.10.0
google-cloud-cpp>=1.34.0
# 1.45.0 appears to segfault on Windows/AppVeyor
grpc-cpp>=1.27.3,<1.45.0
grpc-cpp
gtest>=1.10.0
libprotobuf
libutf8proc
lz4-c
make
ninja
# Required by google-cloud-cpp, the Conda package is missing the dependency:
# https://github.com/conda-forge/google-cloud-cpp-feedstock/issues/28
nlohmann_json
orc
pkg-config
python
rapidjson
Expand All @@ -46,4 +44,3 @@ thrift-cpp>=0.11.0
xsimd
zlib
zstd
flatbuffers
3 changes: 1 addition & 2 deletions ci/scripts/java_jni_windows_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
export ARROW_DATASET
# We can enable this after ARROW-17817 is resolved.
: ${ARROW_ORC:=OFF}
: ${ARROW_ORC:=ON}
export ARROW_ORC
: ${ARROW_PARQUET:=ON}
: ${ARROW_S3:=ON}
Expand Down
2 changes: 0 additions & 2 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ endmacro()

macro(resolve_option_dependencies)
if(MSVC_TOOLCHAIN)
# ARROW-17817: ORC can't be built on Windows.
set(ARROW_ORC OFF)
# Plasma using glog is not fully tested on windows.
set(ARROW_USE_GLOG OFF)
endif()
Expand Down
28 changes: 19 additions & 9 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4286,6 +4286,9 @@ macro(build_orc)
get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)

get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)

# Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding
# the protoc library.
set(ORC_CMAKE_ARGS
Expand All @@ -4305,8 +4308,8 @@ macro(build_orc)
"-DPROTOBUF_INCLUDE_DIR=${ORC_PROTOBUF_INCLUDE_DIR}"
"-DPROTOBUF_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
"-DPROTOC_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
"-DLZ4_HOME=${LZ4_HOME}"
"-DZSTD_HOME=${ZSTD_HOME}")
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DZSTD_HOME=${ORZ_ZSTD_ROOT}")
if(ORC_PROTOBUF_EXECUTABLE)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS}
"-DPROTOBUF_EXECUTABLE:FILEPATH=${ORC_PROTOBUF_EXECUTABLE}")
Expand All @@ -4322,20 +4325,27 @@ macro(build_orc)
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS})

add_dependencies(toolchain orc_ep)
CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

set(ORC_VENDORED 1)
add_dependencies(orc_ep ZLIB::ZLIB)
add_dependencies(orc_ep LZ4::lz4)
add_dependencies(orc_ep ${Snappy_TARGET})
add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF})

add_library(orc::liborc STATIC IMPORTED)
set_target_properties(orc::liborc
PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::liborc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
if(NOT MSVC)
if(NOT APPLE)
target_link_libraries(orc::liborc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::liborc INTERFACE ${CMAKE_DL_LIBS})
endif()

add_dependencies(toolchain orc_ep)
add_dependencies(orc::liborc orc_ep)
Expand Down
15 changes: 1 addition & 14 deletions cpp/src/arrow/adapters/orc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,14 @@ install(FILES adapter.h options.h
# pkg-config support
arrow_add_pkg_config("arrow-orc")

set(ORC_MIN_TEST_LIBS
GTest::gtest_main
GTest::gtest
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

if(ARROW_BUILD_STATIC)
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static)
else()
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared)
endif()

if(APPLE)
set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} ${CMAKE_DL_LIBS})
elseif(NOT MSVC)
set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} pthread ${CMAKE_DL_LIBS})
endif()

set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
${ORC_MIN_TEST_LIBS})
GTest::gtest_main GTest::gtest)

add_arrow_test(adapter_test
PREFIX
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/arrow/adapters/orc/adapter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@ namespace arrow {

using internal::checked_pointer_cast;

constexpr int kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
constexpr size_t kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
constexpr size_t kDefaultMemStreamSize = 10 * 1024 * 1024;
constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
const int64_t kMicroMax = std::floor(kNanoMax / 1000);
const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
const int64_t kMilliMax = std::floor(kMicroMax / 1000);
const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
const int64_t kSecondMax = std::floor(kMilliMax / 1000);
const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
const int64_t kMicroMax = static_cast<int64_t>(std::floor(kNanoMax / 1000));
const int64_t kMicroMin = static_cast<int64_t>(std::ceil(kNanoMin / 1000));
const int64_t kMilliMax = static_cast<int64_t>(std::floor(kMicroMax / 1000));
const int64_t kMilliMin = static_cast<int64_t>(std::ceil(kMicroMin / 1000));
const int64_t kSecondMax = static_cast<int64_t>(std::floor(kMilliMax / 1000));
const int64_t kSecondMin = static_cast<int64_t>(std::ceil(kMilliMin / 1000));

static constexpr random::SeedType kRandomSeed = 0x0ff1ce;

class MemoryOutputStream : public liborc::OutputStream {
public:
explicit MemoryOutputStream(ssize_t capacity)
explicit MemoryOutputStream(size_t capacity)
: data_(capacity), name_("MemoryOutputStream"), length_(0) {}

uint64_t getLength() const override { return length_; }
Expand Down Expand Up @@ -86,12 +86,13 @@ class MemoryOutputStream : public liborc::OutputStream {
std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
int64_t length) {
BufferBuilder builder;
int32_t offsets[length];
std::vector<int32_t> offsets;
offsets.resize(length);
ARROW_EXPECT_OK(builder.Resize(4 * length));
for (int32_t i = 0; i < length; i++) {
offsets[i] = fixed_length * i;
for (int64_t i = 0; i < length; i++) {
offsets[i] = static_cast<int32_t>(fixed_length * i);
}
ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
ARROW_EXPECT_OK(builder.Append(offsets.data(), 4 * length));
std::shared_ptr<Buffer> buffer;
ARROW_EXPECT_OK(builder.Finish(&buffer));
return buffer;
Expand Down
5 changes: 3 additions & 2 deletions dev/tasks/java-jars/github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ jobs:
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
REM For ORC
set TZDIR=/c/msys64/usr/share/zoneinfo
bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist"
- name: Compress into single artifact to keep directory structure
shell: bash
Expand Down Expand Up @@ -148,8 +150,7 @@ jobs:
test -f arrow/java-dist/arrow_dataset_jni.dll
test -f arrow/java-dist/libarrow_orc_jni.dylib
test -f arrow/java-dist/libarrow_orc_jni.so
# We can enable this after ARROW-17817 is resolved.
# test -f arrow/java-dist/arrow_orc_jni.dll
test -f arrow/java-dist/arrow_orc_jni.dll
test -f arrow/java-dist/libgandiva_jni.dylib
test -f arrow/java-dist/libgandiva_jni.so
test -f arrow/java-dist/libplasma_java.dylib
Expand Down

0 comments on commit aeba616

Please sign in to comment.