From 0073b5657479a80ad2febff4d3ebf02461e51497 Mon Sep 17 00:00:00 2001 From: "Korn, Uwe" Date: Thu, 26 Jul 2018 10:29:07 +0200 Subject: [PATCH 1/3] PARQUET-1357: FormatStatValue truncates binary statistics on zero character --- src/parquet/printer.cc | 8 ++++---- src/parquet/types-test.cc | 22 ++++++++++++++++++++++ src/parquet/types.cc | 35 +++++++++++++++++++++++++++++++++++ src/parquet/types.h | 3 +++ 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/parquet/printer.cc b/src/parquet/printer.cc index 88b55289..38e9703d 100644 --- a/src/parquet/printer.cc +++ b/src/parquet/printer.cc @@ -84,8 +84,8 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list selecte std::string min = stats->EncodeMin(), max = stats->EncodeMax(); stream << ", Null Values: " << stats->null_count() << ", Distinct Values: " << stats->distinct_count() << std::endl - << " Max: " << FormatStatValue(descr->physical_type(), max.c_str()) - << ", Min: " << FormatStatValue(descr->physical_type(), min.c_str()); + << " Max: " << FormatStatValue(descr->physical_type(), max) + << ", Min: " << FormatStatValue(descr->physical_type(), min); } else { stream << " Statistics Not Set"; } @@ -207,9 +207,9 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected std::string min = stats->EncodeMin(), max = stats->EncodeMax(); stream << "\"NumNulls\": \"" << stats->null_count() << "\", " << "\"DistinctValues\": \"" << stats->distinct_count() << "\", " - << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max.c_str()) + << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", " - << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min.c_str()) + << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min) << "\" },"; } else { stream << "\"False\","; diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc index 4e759827..987f36f9 100644 --- a/src/parquet/types-test.cc +++ b/src/parquet/types-test.cc @@ -68,46 +68,68 @@ TEST(TypePrinter, StatisticsTypes) { int32_t int_max = 2048; smin = std::string(reinterpret_cast(&int_min), sizeof(int32_t)); smax = std::string(reinterpret_cast(&int_max), sizeof(int32_t)); + ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin).c_str()); ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin.c_str()).c_str()); + ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax).c_str()); ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax.c_str()).c_str()); int64_t int64_min = 10240000000000; int64_t int64_max = 20480000000000; smin = std::string(reinterpret_cast(&int64_min), sizeof(int64_t)); smax = std::string(reinterpret_cast(&int64_max), sizeof(int64_t)); + ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin).c_str()); ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin.c_str()).c_str()); + ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax).c_str()); ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax.c_str()).c_str()); float float_min = 1.024f; float float_max = 2.048f; smin = std::string(reinterpret_cast(&float_min), sizeof(float)); smax = std::string(reinterpret_cast(&float_max), sizeof(float)); + ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin).c_str()); ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin.c_str()).c_str()); + ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax).c_str()); ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax.c_str()).c_str()); double double_min = 1.0245; double double_max = 2.0489; smin = std::string(reinterpret_cast(&double_min), sizeof(double)); smax = std::string(reinterpret_cast(&double_max), sizeof(double)); + ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin).c_str()); ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin.c_str()).c_str()); + ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax).c_str()); ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax.c_str()).c_str()); Int96 Int96_min = {{1024, 2048, 4096}}; Int96 Int96_max = {{2048, 4096, 8192}}; smin = std::string(reinterpret_cast(&Int96_min), sizeof(Int96)); smax = std::string(reinterpret_cast(&Int96_max), sizeof(Int96)); + ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin).c_str()); ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin.c_str()).c_str()); + ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax).c_str()); ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax.c_str()).c_str()); smin = std::string("abcdef"); smax = std::string("ijklmnop"); + ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin).c_str()); ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin.c_str()).c_str()); + ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax).c_str()); ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax.c_str()).c_str()); + // PARQUET-1357: FormatStatValue truncates binary statistics on zero character + smax.push_back('\0'); + ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax)); + // This fails, thus the call to FormatStatValue(.., const char*) was deprecated. + // ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax.c_str())); + smin = std::string("abcdefgh"); smax = std::string("ijklmnop"); + ASSERT_STREQ("abcdefgh", + FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str()); ASSERT_STREQ("abcdefgh", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str()); + ASSERT_STREQ("ijklmnop", + FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str()); ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); } diff --git a/src/parquet/types.cc b/src/parquet/types.cc index 79bc5d1a..31209631 100644 --- a/src/parquet/types.cc +++ b/src/parquet/types.cc @@ -24,6 +24,41 @@ namespace parquet { +std::string FormatStatValue(Type::type parquet_type, const std::string& val) { + std::stringstream result; + switch (parquet_type) { + case Type::BOOLEAN: + result << reinterpret_cast(val.c_str())[0]; + break; + case Type::INT32: + result << reinterpret_cast(val.c_str())[0]; + break; + case Type::INT64: + result << reinterpret_cast(val.c_str())[0]; + break; + case Type::DOUBLE: + result << reinterpret_cast(val.c_str())[0]; + break; + case Type::FLOAT: + result << reinterpret_cast(val.c_str())[0]; + break; + case Type::INT96: { + auto const i32_val = reinterpret_cast(val.c_str()); + result << i32_val[0] << " " << i32_val[1] << " " << i32_val[2]; + break; + } + case Type::BYTE_ARRAY: { + return val; + } + case Type::FIXED_LEN_BYTE_ARRAY: { + return val; + } + default: + break; + } + return result.str(); +} + std::string FormatStatValue(Type::type parquet_type, const char* val) { std::stringstream result; switch (parquet_type) { diff --git a/src/parquet/types.h b/src/parquet/types.h index 04cfc4b5..5a51288c 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -292,6 +292,9 @@ PARQUET_EXPORT std::string LogicalTypeToString(LogicalType::type t); PARQUET_EXPORT std::string TypeToString(Type::type t); +PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const std::string& val); + +/// \deprecated Since 1.5.0 PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const char* val); PARQUET_EXPORT int GetTypeByteSize(Type::type t); From e0e4946c7cba698f0f54cea33d2293bf9e93e93f Mon Sep 17 00:00:00 2001 From: "Korn, Uwe" Date: Thu, 26 Jul 2018 11:23:24 +0200 Subject: [PATCH 2/3] Update to clang-format 6.0 --- CMakeLists.txt | 2 +- cmake_modules/FindClangTools.cmake | 26 +++++++++++++++---- src/parquet/arrow/arrow-reader-writer-test.cc | 8 +++--- src/parquet/arrow/arrow-schema-test.cc | 11 ++++---- src/parquet/arrow/reader.cc | 4 +-- src/parquet/arrow/test-util.h | 13 +++++----- src/parquet/arrow/writer.cc | 10 ++++--- src/parquet/encoding-benchmark.cc | 2 +- src/parquet/encoding-test.cc | 2 +- src/parquet/encoding.h | 8 +++--- src/parquet/printer.cc | 3 +-- src/parquet/statistics-test.cc | 7 ++--- src/parquet/statistics.cc | 2 +- src/parquet/types-test.cc | 6 ++--- src/parquet/types.h | 3 ++- src/parquet/util/memory-test.cc | 6 ++--- src/parquet/util/memory.cc | 7 ++--- src/parquet/util/memory.h | 2 +- 18 files changed, 67 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b53f5980..927f7289 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,7 @@ enable_testing() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") -set(CLANG_FORMAT_VERSION "5.0") +set(CLANG_FORMAT_VERSION "6.0") find_package(ClangTools) if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND) # Generate a Clang compile_commands.json "compilation database" file for use diff --git a/cmake_modules/FindClangTools.cmake b/cmake_modules/FindClangTools.cmake index e9221ff2..215a5cd9 100644 --- a/cmake_modules/FindClangTools.cmake +++ b/cmake_modules/FindClangTools.cmake @@ -30,6 +30,12 @@ # CLANG_FORMAT_BIN, The path to the clang format binary # CLANG_TIDY_FOUND, Whether clang format was found +if (DEFINED ENV{HOMEBREW_PREFIX}) + set(HOMEBREW_PREFIX "${ENV{HOMEBREW_PREFIX}") +else() + set(HOMEBREW_PREFIX "/usr/local") +endif() + find_program(CLANG_TIDY_BIN NAMES clang-tidy-4.0 clang-tidy-3.9 @@ -37,7 +43,7 @@ find_program(CLANG_TIDY_BIN clang-tidy-3.7 clang-tidy-3.6 clang-tidy - PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin + PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin" NO_DEFAULT_PATH ) @@ -55,7 +61,7 @@ if (CLANG_FORMAT_VERSION) PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} - /usr/local/bin /usr/bin + /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin" NO_DEFAULT_PATH ) @@ -67,16 +73,26 @@ if (CLANG_FORMAT_VERSION) if ("${CLANG_FORMAT_MINOR_VERSION}" STREQUAL "0") find_program(CLANG_FORMAT_BIN NAMES clang-format - PATHS /usr/local/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin + PATHS "${HOMEBREW_PREFIX}/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin" NO_DEFAULT_PATH ) else() find_program(CLANG_FORMAT_BIN NAMES clang-format - PATHS /usr/local/opt/llvm@${CLANG_FORMAT_VERSION}/bin + PATHS "${HOMEBREW_PREFIX}/opt/llvm@${CLANG_FORMAT_VERSION}/bin" NO_DEFAULT_PATH ) endif() + + if ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND") + # binary was still not found, look into Cellar + file(GLOB CLANG_FORMAT_PATH "${HOMEBREW_PREFIX}/Cellar/llvm/${CLANG_FORMAT_VERSION}.*") + find_program(CLANG_FORMAT_BIN + NAMES clang-format + PATHS "${CLANG_FORMAT_PATH}/bin" + NO_DEFAULT_PATH + ) + endif() endif() else() find_program(CLANG_FORMAT_BIN @@ -86,7 +102,7 @@ else() clang-format-3.7 clang-format-3.6 clang-format - PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin + PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin" NO_DEFAULT_PATH ) endif() diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 1c2f3225..8955b0ab 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -52,16 +52,16 @@ using arrow::Buffer; using arrow::ChunkedArray; using arrow::Column; using arrow::DataType; +using arrow::default_memory_pool; using arrow::ListArray; -using arrow::ResizableBuffer; using arrow::PrimitiveArray; +using arrow::ResizableBuffer; using arrow::Status; using arrow::Table; using arrow::TimeUnit; using arrow::compute::Datum; using arrow::compute::DictionaryEncode; using arrow::compute::FunctionContext; -using arrow::default_memory_pool; using arrow::io::BufferReader; using arrow::test::randint; @@ -1453,13 +1453,13 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) { // Regression for ARROW-2802 TEST(TestArrowReadWrite, CoerceTimestampsAndSupportDeprecatedInt96) { using ::arrow::Column; + using ::arrow::default_memory_pool; using ::arrow::Field; using ::arrow::Schema; using ::arrow::Table; - using ::arrow::TimeUnit; using ::arrow::TimestampBuilder; using ::arrow::TimestampType; - using ::arrow::default_memory_pool; + using ::arrow::TimeUnit; auto timestamp_type = std::make_shared(TimeUnit::NANO); diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index 5c16c044..cb2b8508 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test { for (int i = 0; i < expected_schema->num_fields(); ++i) { auto lhs = result_schema_->field(i); auto rhs = expected_schema->field(i); - EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString() - << " != " << rhs->ToString(); + EXPECT_TRUE(lhs->Equals(rhs)) + << i << " " << lhs->ToString() << " != " << rhs->ToString(); } } @@ -607,9 +607,10 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) { auto inner_group_type = std::make_shared<::arrow::StructType>(inner_group_fields); auto outer_group_fields = { std::make_shared("leaf2", INT32, true), - std::make_shared("innerGroup", ::arrow::list(std::make_shared( - "innerGroup", inner_group_type, false)), - false)}; + std::make_shared( + "innerGroup", + ::arrow::list(std::make_shared("innerGroup", inner_group_type, false)), + false)}; auto outer_group_type = std::make_shared<::arrow::StructType>(outer_group_fields); arrow_fields.push_back(std::make_shared("leaf1", INT32, true)); diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 0f671b71..c0974ca4 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -301,9 +301,7 @@ class PARQUET_NO_EXPORT StructImpl : public ColumnReader::ColumnReaderImpl { public: explicit StructImpl(const std::vector>& children, int16_t struct_def_level, MemoryPool* pool, const Node* node) - : children_(children), - struct_def_level_(struct_def_level), - pool_(pool) { + : children_(children), struct_def_level_(struct_def_level), pool_(pool) { InitField(node, children); } diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 4db98b77..bfc78c87 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -402,17 +402,16 @@ Status MakeEmptyListsArray(int64_t size, std::shared_ptr* out_array) { &offsets_buffer)); memset(offsets_buffer->mutable_data(), 0, offsets_nbytes); - auto value_field = ::arrow::field("item", ::arrow::float64(), - false /* nullable_values */); + auto value_field = + ::arrow::field("item", ::arrow::float64(), false /* nullable_values */); auto list_type = ::arrow::list(value_field); std::vector> child_buffers = {nullptr /* null bitmap */, - nullptr /* values */ }; - auto child_data = ::arrow::ArrayData::Make(value_field->type(), 0, - std::move(child_buffers)); + nullptr /* values */}; + auto child_data = + ::arrow::ArrayData::Make(value_field->type(), 0, std::move(child_buffers)); - std::vector> buffers = {nullptr /* bitmap */, - offsets_buffer }; + std::vector> buffers = {nullptr /* bitmap */, offsets_buffer}; auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers)); array_data->child_data.push_back(child_data); diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index f3ddda90..4bfeb370 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -41,8 +41,8 @@ using arrow::Int16Builder; using arrow::ListArray; using arrow::MemoryPool; using arrow::NumericArray; -using arrow::ResizableBuffer; using arrow::PrimitiveArray; +using arrow::ResizableBuffer; using arrow::Status; using arrow::Table; using arrow::TimeUnit; @@ -216,9 +216,11 @@ class LevelBuilder { if (level_null_count && level_valid_bitmap == nullptr) { // Special case: this is a null array (all elements are null) RETURN_NOT_OK(def_levels_.Append(static_cast(def_level + 1))); - } else if (nullable_level && ((level_null_count == 0) || - BitUtil::GetBit(level_valid_bitmap, - inner_offset + i + array_offsets_[recursion_level]))) { + } else if (nullable_level && + ((level_null_count == 0) || + BitUtil::GetBit( + level_valid_bitmap, + inner_offset + i + array_offsets_[recursion_level]))) { // Non-null element in a null level RETURN_NOT_OK(def_levels_.Append(static_cast(def_level + 2))); } else { diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc index 5ea8f8f5..364cdba1 100644 --- a/src/parquet/encoding-benchmark.cc +++ b/src/parquet/encoding-benchmark.cc @@ -20,8 +20,8 @@ #include "parquet/encoding-internal.h" #include "parquet/util/memory.h" -using arrow::MemoryPool; using arrow::default_memory_pool; +using arrow::MemoryPool; namespace parquet { diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc index 31bb79d0..60285ab2 100644 --- a/src/parquet/encoding-test.cc +++ b/src/parquet/encoding-test.cc @@ -30,8 +30,8 @@ #include "parquet/util/memory.h" #include "parquet/util/test-common.h" -using arrow::MemoryPool; using arrow::default_memory_pool; +using arrow::MemoryPool; using std::string; using std::vector; diff --git a/src/parquet/encoding.h b/src/parquet/encoding.h index 2742937c..006f22f2 100644 --- a/src/parquet/encoding.h +++ b/src/parquet/encoding.h @@ -51,12 +51,12 @@ class Encoder { virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) { std::shared_ptr buffer; - auto status = ::arrow::AllocateResizableBuffer(pool_, num_values * sizeof(T), - &buffer); + auto status = + ::arrow::AllocateResizableBuffer(pool_, num_values * sizeof(T), &buffer); if (!status.ok()) { std::ostringstream ss; - ss << "AllocateResizableBuffer failed in Encoder.PutSpaced in " - << __FILE__ << ", on line " << __LINE__; + ss << "AllocateResizableBuffer failed in Encoder.PutSpaced in " << __FILE__ + << ", on line " << __LINE__; throw ParquetException(ss.str()); } int32_t num_valid_values = 0; diff --git a/src/parquet/printer.cc b/src/parquet/printer.cc index 38e9703d..3f18a5c8 100644 --- a/src/parquet/printer.cc +++ b/src/parquet/printer.cc @@ -207,8 +207,7 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected std::string min = stats->EncodeMin(), max = stats->EncodeMax(); stream << "\"NumNulls\": \"" << stats->null_count() << "\", " << "\"DistinctValues\": \"" << stats->distinct_count() << "\", " - << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) - << "\", " + << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", " << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min) << "\" },"; } else { diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc index bf3d1968..943d5ccf 100644 --- a/src/parquet/statistics-test.cc +++ b/src/parquet/statistics-test.cc @@ -36,8 +36,8 @@ #include "parquet/types.h" #include "parquet/util/memory.h" -using arrow::MemoryPool; using arrow::default_memory_pool; +using arrow::MemoryPool; namespace parquet { @@ -194,8 +194,9 @@ bool* TestRowGroupStatistics::GetValuesPointer(std::vector& v } template -typename std::vector TestRowGroupStatistics< - TestType>::GetDeepCopy(const std::vector& values) { +typename std::vector +TestRowGroupStatistics::GetDeepCopy( + const std::vector& values) { return values; } diff --git a/src/parquet/statistics.cc b/src/parquet/statistics.cc index 5b014edc..ea7f783b 100644 --- a/src/parquet/statistics.cc +++ b/src/parquet/statistics.cc @@ -24,8 +24,8 @@ #include "parquet/statistics.h" #include "parquet/util/memory.h" -using arrow::MemoryPool; using arrow::default_memory_pool; +using arrow::MemoryPool; namespace parquet { diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc index 987f36f9..dc2112c5 100644 --- a/src/parquet/types-test.cc +++ b/src/parquet/types-test.cc @@ -124,12 +124,10 @@ TEST(TypePrinter, StatisticsTypes) { smin = std::string("abcdefgh"); smax = std::string("ijklmnop"); - ASSERT_STREQ("abcdefgh", - FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str()); + ASSERT_STREQ("abcdefgh", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str()); ASSERT_STREQ("abcdefgh", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str()); - ASSERT_STREQ("ijklmnop", - FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str()); + ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str()); ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); } diff --git a/src/parquet/types.h b/src/parquet/types.h index 5a51288c..3d033455 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -292,7 +292,8 @@ PARQUET_EXPORT std::string LogicalTypeToString(LogicalType::type t); PARQUET_EXPORT std::string TypeToString(Type::type t); -PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const std::string& val); +PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, + const std::string& val); /// \deprecated Since 1.5.0 PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const char* val); diff --git a/src/parquet/util/memory-test.cc b/src/parquet/util/memory-test.cc index cb8c7061..bfd685db 100644 --- a/src/parquet/util/memory-test.cc +++ b/src/parquet/util/memory-test.cc @@ -27,8 +27,8 @@ #include "parquet/util/memory.h" #include "parquet/util/test-common.h" -using arrow::MemoryPool; using arrow::default_memory_pool; +using arrow::MemoryPool; namespace parquet { @@ -255,8 +255,8 @@ TEST(TestBufferedInputStream, Basics) { int64_t stream_offset = 10; int64_t stream_size = source_size - stream_offset; int64_t chunk_size = 50; - std::shared_ptr buf = AllocateBuffer(default_memory_pool(), - source_size); + std::shared_ptr buf = + AllocateBuffer(default_memory_pool(), source_size); ASSERT_EQ(source_size, buf->size()); for (int i = 0; i < source_size; i++) { buf->mutable_data()[i] = static_cast(i); diff --git a/src/parquet/util/memory.cc b/src/parquet/util/memory.cc index df7ccc76..d9caf6e3 100644 --- a/src/parquet/util/memory.cc +++ b/src/parquet/util/memory.cc @@ -36,9 +36,7 @@ namespace parquet { template Vector::Vector(int64_t size, MemoryPool* pool) - : buffer_(AllocateBuffer(pool, size * sizeof(T))), - size_(size), - capacity_(size) { + : buffer_(AllocateBuffer(pool, size * sizeof(T))), size_(size), capacity_(size) { if (size > 0) { data_ = reinterpret_cast(buffer_->mutable_data()); } else { @@ -497,8 +495,7 @@ void BufferedInputStream::Advance(int64_t num_bytes) { std::shared_ptr AllocateBuffer(MemoryPool* pool, int64_t size) { std::shared_ptr result; - PARQUET_THROW_NOT_OK(arrow::AllocateResizableBuffer(pool, size, - &result)); + PARQUET_THROW_NOT_OK(arrow::AllocateResizableBuffer(pool, size, &result)); return result; } diff --git a/src/parquet/util/memory.h b/src/parquet/util/memory.h index 69dcebf4..088f86fe 100644 --- a/src/parquet/util/memory.h +++ b/src/parquet/util/memory.h @@ -438,7 +438,7 @@ class PARQUET_EXPORT BufferedInputStream : public InputStream { }; std::shared_ptr PARQUET_EXPORT AllocateBuffer( - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), int64_t size = 0); + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), int64_t size = 0); } // namespace parquet From 07ffc738c003ef44b47526c442cffc5c01905bcd Mon Sep 17 00:00:00 2001 From: "Korn, Uwe" Date: Thu, 26 Jul 2018 13:49:18 +0200 Subject: [PATCH 3/3] Add macro for deprecations --- src/parquet/types-test.cc | 3 +++ src/parquet/types.h | 2 ++ src/parquet/util/macros.h | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc index dc2112c5..a1d56796 100644 --- a/src/parquet/types-test.cc +++ b/src/parquet/types-test.cc @@ -62,6 +62,8 @@ TEST(TestLogicalTypeToString, LogicalTypes) { } TEST(TypePrinter, StatisticsTypes) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" std::string smin; std::string smax; int32_t int_min = 1024; @@ -130,6 +132,7 @@ TEST(TypePrinter, StatisticsTypes) { ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str()); ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); +#pragma GCC diagnostic pop } } // namespace parquet diff --git a/src/parquet/types.h b/src/parquet/types.h index 3d033455..0f4cfc21 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -27,6 +27,7 @@ #include "arrow/util/macros.h" +#include "parquet/util/macros.h" #include "parquet/util/visibility.h" namespace parquet { @@ -296,6 +297,7 @@ PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const std::string& val); /// \deprecated Since 1.5.0 +PARQUET_DEPRECATED("Use std::string instead of char* as input") PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const char* val); PARQUET_EXPORT int GetTypeByteSize(Type::type t); diff --git a/src/parquet/util/macros.h b/src/parquet/util/macros.h index 0d172b13..c28b2fa6 100644 --- a/src/parquet/util/macros.h +++ b/src/parquet/util/macros.h @@ -68,4 +68,19 @@ #define FRIEND_TEST(test_case_name, test_name) \ friend class test_case_name##_##test_name##_Test +// clang-format off +// [[deprecated]] is only available in C++14, use this for the time being +// This macro takes an optional deprecation message +#if __cplusplus <= 201103L +# ifdef __GNUC__ +# define PARQUET_DEPRECATED(...) __attribute__((deprecated(__VA_ARGS__))) +# elif defined(_MSC_VER) +# define PARQUET_DEPRECATED(...) __declspec(deprecated(__VA_ARGS__)) +# else +# define PARQUET_DEPRECATED(...) +# endif +#else +# define PARQUET_DEPRECATED(...) [[deprecated(__VA_ARGS__)]] +#endif + #endif // PARQUET_UTIL_MACROS_H