From 3ede47cea09f9ffe6d0e85331e6dbd0e26848216 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 16 Aug 2018 09:55:24 +0200 Subject: [PATCH 1/4] PARQUET-1382: [C++] Prepare for arrow::test namespace removal --- src/parquet/arrow/arrow-reader-writer-test.cc | 47 +++++-------------- src/parquet/arrow/test-util.h | 28 +++++++---- 2 files changed, 30 insertions(+), 45 deletions(-) diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index d4f5b000..8c57b213 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -64,8 +64,8 @@ using arrow::compute::DictionaryEncode; using arrow::compute::FunctionContext; using arrow::io::BufferReader; -using arrow::test::randint; -using arrow::test::random_is_valid; +using arrow::randint; +using arrow::random_is_valid; using ArrowId = ::arrow::Type; using ParquetType = parquet::Type; @@ -376,29 +376,6 @@ void PrintColumn(const Column& col, std::stringstream* ss) { } } -void AssertTablesEqual(const Table& expected, const Table& actual, - bool same_chunk_layout = true) { - ASSERT_EQ(expected.num_columns(), actual.num_columns()); - - if (same_chunk_layout) { - for (int i = 0; i < actual.num_columns(); ++i) { - AssertChunkedEqual(*expected.column(i)->data(), *actual.column(i)->data()); - } - } else { - std::stringstream ss; - if (!actual.Equals(expected)) { - for (int i = 0; i < expected.num_columns(); ++i) { - ss << "Actual column " << i << std::endl; - PrintColumn(*actual.column(i), &ss); - - ss << "Expected column " << i << std::endl; - PrintColumn(*expected.column(i), &ss); - } - FAIL() << ss.str(); - } - } -} - void DoSimpleRoundtrip(const std::shared_ptr& table, int num_threads, int64_t row_group_size, const std::vector& column_subset, std::shared_ptr
* out, @@ -428,7 +405,7 @@ void CheckSimpleRoundtrip(const std::shared_ptr
& table, int64_t row_group default_arrow_writer_properties()) { std::shared_ptr
result; DoSimpleRoundtrip(table, 1, row_group_size, {}, &result, arrow_properties); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*table, *result, false)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result, false)); } static std::shared_ptr MakeSimpleSchema(const ::DataType& type, @@ -1273,14 +1250,14 @@ TEST(TestArrowReadWrite, DateTimeTypes) { table, 1, table->num_rows(), {}, &result, ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build())); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*table, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); // Cast nanaoseconds to microseconds and use INT64 physical type ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result)); std::shared_ptr
expected; MakeDateTimeTypesTable(&table, true); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*table, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); } TEST(TestArrowReadWrite, CoerceTimestamps) { @@ -1342,13 +1319,13 @@ TEST(TestArrowReadWrite, CoerceTimestamps) { input, 1, input->num_rows(), {}, &milli_result, ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MILLI)->build())); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*ex_milli_result, *milli_result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_milli_result, *milli_result)); std::shared_ptr
micro_result; ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip( input, 1, input->num_rows(), {}, µ_result, ArrowWriterProperties::Builder().coerce_timestamps(TimeUnit::MICRO)->build())); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*ex_micro_result, *micro_result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_micro_result, *micro_result)); } TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) { @@ -1474,7 +1451,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) { std::shared_ptr
result; ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result)); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*ex_table, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_table, *result)); } // Regression for ARROW-2802 @@ -1608,7 +1585,7 @@ TEST(TestArrowReadWrite, MultithreadedRead) { ASSERT_NO_FATAL_FAILURE( DoSimpleRoundtrip(table, num_threads, table->num_rows(), {}, &result)); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*table, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); } TEST(TestArrowReadWrite, ReadSingleRowGroup) { @@ -1719,7 +1696,7 @@ TEST(TestArrowReadWrite, ReadColumnSubset) { auto ex_schema = ::arrow::schema(ex_fields); auto expected = Table::Make(ex_schema, ex_columns); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*expected, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*expected, *result)); } TEST(TestArrowReadWrite, ListLargeRecords) { @@ -1745,7 +1722,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) { // Read everything std::shared_ptr
result; ASSERT_OK_NO_THROW(reader->ReadTable(&result)); - ASSERT_NO_FATAL_FAILURE(AssertTablesEqual(*table, *result)); + ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), ::arrow::default_memory_pool(), @@ -1922,7 +1899,7 @@ TEST(TestArrowReadWrite, DictionaryColumnChunkedWrite) { auto expected_table = Table::Make(schema, columns); - AssertTablesEqual(*expected_table, *result, false); + ::arrow::AssertTablesEqual(*expected_table, *result, false); } TEST(TestArrowWrite, CheckChunkSize) { diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 2babacb8..8e058a60 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -24,6 +24,14 @@ #include "arrow/type_traits.h" #include "arrow/util/decimal.h" +namespace arrow { +// PARQUET-1382: backwards-compatible shim for arrow::test namespace +namespace test {} + +using namespace ::arrow::test; // NOLINT + +} // namespace arrow + namespace parquet { namespace arrow { @@ -66,7 +74,7 @@ typename std::enable_if::value, Status>::type NonNullA size_t size, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; - ::arrow::test::random_real(size, 0, static_cast(0), static_cast(1), + ::arrow::random_real(size, 0, static_cast(0), static_cast(1), &values); ::arrow::NumericBuilder builder; RETURN_NOT_OK(builder.Append(values.data(), values.size())); @@ -78,7 +86,7 @@ typename std::enable_if< is_arrow_int::value && !is_arrow_date::value, Status>::type NonNullArray(size_t size, std::shared_ptr* out) { std::vector values; - ::arrow::test::randint(size, 0, 64, &values); + ::arrow::randint(size, 0, 64, &values); // Passing data type so this will work with TimestampType too ::arrow::NumericBuilder builder(std::make_shared(), @@ -91,7 +99,7 @@ template typename std::enable_if::value, Status>::type NonNullArray( size_t size, std::shared_ptr* out) { std::vector values; - ::arrow::test::randint(size, 0, 64, &values); + ::arrow::randint(size, 0, 64, &values); for (size_t i = 0; i < size; i++) { values[i] *= 86400000; } @@ -175,7 +183,7 @@ template typename std::enable_if::value, Status>::type NonNullArray( size_t size, std::shared_ptr* out) { std::vector values; - ::arrow::test::randint(size, 0, 1, &values); + ::arrow::randint(size, 0, 1, &values); ::arrow::BooleanBuilder builder; RETURN_NOT_OK(builder.Append(values.data(), values.size())); return builder.Finish(out); @@ -187,7 +195,7 @@ typename std::enable_if::value, Status>::type Nullable size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr* out) { using c_type = typename ArrowType::c_type; std::vector values; - ::arrow::test::random_real(size, seed, static_cast(-1e10), + ::arrow::random_real(size, seed, static_cast(-1e10), static_cast(1e10), &values); std::vector valid_bytes(size, 1); @@ -209,7 +217,7 @@ NullableArray(size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr valid_bytes(size, 1); for (size_t i = 0; i < num_nulls; i++) { @@ -230,7 +238,7 @@ typename std::enable_if::value, Status>::type NullableA // Seed is random in Arrow right now (void)seed; - ::arrow::test::randint(size, 0, 64, &values); + ::arrow::randint(size, 0, 64, &values); for (size_t i = 0; i < size; i++) { values[i] *= 86400000; } @@ -268,7 +276,7 @@ NullableArray(size_t size, size_t num_nulls, uint32_t seed, if (!valid_bytes[i]) { RETURN_NOT_OK(builder.AppendNull()); } else { - ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); + ::arrow::random_bytes(kBufferSize, seed + static_cast(i), buffer); RETURN_NOT_OK(builder.Append(buffer, kBufferSize)); } } @@ -297,7 +305,7 @@ NullableArray(size_t size, size_t num_nulls, uint32_t seed, if (!valid_bytes[i]) { RETURN_NOT_OK(builder.AppendNull()); } else { - ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); + ::arrow::random_bytes(kBufferSize, seed + static_cast(i), buffer); RETURN_NOT_OK(builder.Append(buffer)); } } @@ -341,7 +349,7 @@ typename std::enable_if::value, Status>::type NullableA // Seed is random in Arrow right now (void)seed; - ::arrow::test::randint(size, 0, 1, &values); + ::arrow::randint(size, 0, 1, &values); std::vector valid_bytes(size, 1); for (size_t i = 0; i < num_nulls; i++) { From 6b1dde13cbd7102aba01b26d29da86856aade20b Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 16 Aug 2018 10:29:42 +0200 Subject: [PATCH 2/4] Bump Arrow changeset --- cmake_modules/ArrowExternalProject.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake_modules/ArrowExternalProject.cmake b/cmake_modules/ArrowExternalProject.cmake index 4f23661e..5ff22afe 100644 --- a/cmake_modules/ArrowExternalProject.cmake +++ b/cmake_modules/ArrowExternalProject.cmake @@ -46,7 +46,8 @@ if (MSVC AND PARQUET_USE_STATIC_CRT) endif() if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "") - set(ARROW_VERSION "501d60e918bd4d10c429ab34e0b8e8a87dffb732") + # This can be a tag or changeset + set(ARROW_VERSION "apache-arrow-0.10.0") else() set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}") endif() From a6a21980ce3a6daea6bcdc1ef2d7875d7c94dd93 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 16 Aug 2018 10:58:03 +0200 Subject: [PATCH 3/4] Try to silence deprecation errors on gcc / clang --- .travis.yml | 10 ++++++++-- ci/travis_script_cpp.sh | 8 ++++---- cmake_modules/SetupCxxFlags.cmake | 7 ++++--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7918b890..294c0afc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + sudo: required dist: trusty + +language: cpp + +cache: + ccache: true + addons: apt: sources: @@ -35,6 +42,7 @@ addons: - bison - flex - pkg-config + matrix: fast_finish: true include: @@ -76,8 +84,6 @@ matrix: script: - $TRAVIS_BUILD_DIR/ci/travis_script_toolchain.sh -language: cpp - # PARQUET-626: revisit llvm toolchain when/if llvm.org apt repo resurfaces # before_install: diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh index d3cef663..a7aeaca4 100755 --- a/ci/travis_script_cpp.sh +++ b/ci/travis_script_cpp.sh @@ -33,18 +33,18 @@ make lint # fi if [ $TRAVIS_OS_NAME == "linux" ]; then - make -j4 || exit 1 - ctest -VV -L unittest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; } + make -j4 + ctest -j2 --output-on-failure -L unittest # Current cpp-coveralls version 0.4 throws an error (PARQUET-1075) on Travis CI. Pin to last working version sudo pip install cpp_coveralls==0.3.12 export PARQUET_ROOT=$TRAVIS_BUILD_DIR $TRAVIS_BUILD_DIR/ci/upload_coverage.sh else - make -j4 || exit 1 + make -j4 BUILD_TYPE=debug EXECUTABLE_DIR=$CPP_BUILD_DIR/$BUILD_TYPE export LD_LIBRARY_PATH=$EXECUTABLE_DIR:$LD_LIBRARY_PATH - ctest -VV -L unittest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; } + ctest -j2 --output-on-failure -L unittest fi popd diff --git a/cmake_modules/SetupCxxFlags.cmake b/cmake_modules/SetupCxxFlags.cmake index 01ed85bf..097772eb 100644 --- a/cmake_modules/SetupCxxFlags.cmake +++ b/cmake_modules/SetupCxxFlags.cmake @@ -79,8 +79,8 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") elseif ("${COMPILER_FAMILY}" STREQUAL "clang") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat \ --Wno-c++98-compat-pedantic -Wno-deprecated -Wno-weak-vtables -Wno-padded \ --Wno-comma -Wno-unused-parameter -Wno-undef \ +-Wno-c++98-compat-pedantic -Wno-deprecated -Wno-deprecated-declarations \ +-Wno-weak-vtables -Wno-padded -Wno-comma -Wno-unused-parameter -Wno-undef \ -Wno-shadow -Wno-switch-enum -Wno-exit-time-destructors \ -Wno-global-constructors -Wno-weak-template-vtables -Wno-undefined-reinterpret-cast \ -Wno-implicit-fallthrough -Wno-unreachable-code-return \ @@ -117,7 +117,8 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") # Treat all compiler warnings as errors set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option -Werror") elseif ("${COMPILER_FAMILY}" STREQUAL "gcc") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wconversion -Wno-sign-conversion") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall \ +-Wno-deprecated-declarations -Wconversion -Wno-sign-conversion") # Treat all compiler warnings as errors set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror") else() From 6760214ad7ab3a8c534d8bbf32f453e730fd9e6f Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 16 Aug 2018 11:24:53 +0200 Subject: [PATCH 4/4] Try to silence deprecation warnings --- ci/travis_script_static.sh | 5 +++-- ci/travis_script_toolchain.sh | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ci/travis_script_static.sh b/ci/travis_script_static.sh index b76ced8f..5c15ec86 100755 --- a/ci/travis_script_static.sh +++ b/ci/travis_script_static.sh @@ -66,6 +66,7 @@ export LZ4_STATIC_LIB=$ARROW_EP/lz4_ep-prefix/src/lz4_ep/lib/liblz4.a export ZSTD_STATIC_LIB=$ARROW_EP/zstd_ep-prefix/src/zstd_ep/lib/libzstd.a cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_ARROW_LINKAGE="static" \ -DPARQUET_BUILD_SHARED=OFF \ @@ -78,7 +79,7 @@ cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \ pushd $CPP_BUILD_DIR -make -j4 VERBOSE=1 || exit 1 -ctest -VV -L unittest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; } +make -j4 VERBOSE=1 +ctest -j2 --output-on-failure -L unittest popd diff --git a/ci/travis_script_toolchain.sh b/ci/travis_script_toolchain.sh index 036ab3c2..3cbcd34b 100755 --- a/ci/travis_script_toolchain.sh +++ b/ci/travis_script_toolchain.sh @@ -52,6 +52,7 @@ export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH export BOOST_ROOT=$CPP_TOOLCHAIN cmake -DPARQUET_CXXFLAGS=-Werror \ + -DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_GENERATE_COVERAGE=1 \ -DCMAKE_INSTALL_PREFIX=$CPP_TOOLCHAIN \ @@ -59,9 +60,9 @@ cmake -DPARQUET_CXXFLAGS=-Werror \ pushd $CPP_BUILD_DIR -make -j4 || exit 1 -make install || exit 1 -ctest -VV -L unittest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; } +make -j4 +make install +ctest -j2 --output-on-failure -L unittest popd