Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
Merge 07ffc73 into c246da9
Browse files Browse the repository at this point in the history
  • Loading branch information
xhochy committed Jul 26, 2018
2 parents c246da9 + 07ffc73 commit 8cf2568
Show file tree
Hide file tree
Showing 20 changed files with 145 additions and 53 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ enable_testing()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")

set(CLANG_FORMAT_VERSION "5.0")
set(CLANG_FORMAT_VERSION "6.0")
find_package(ClangTools)
if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND)
# Generate a Clang compile_commands.json "compilation database" file for use
Expand Down
26 changes: 21 additions & 5 deletions cmake_modules/FindClangTools.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,20 @@
# CLANG_FORMAT_BIN, The path to the clang format binary
# CLANG_TIDY_FOUND, Whether clang format was found

if (DEFINED ENV{HOMEBREW_PREFIX})
set(HOMEBREW_PREFIX "${ENV{HOMEBREW_PREFIX}")
else()
set(HOMEBREW_PREFIX "/usr/local")
endif()

find_program(CLANG_TIDY_BIN
NAMES clang-tidy-4.0
clang-tidy-3.9
clang-tidy-3.8
clang-tidy-3.7
clang-tidy-3.6
clang-tidy
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
NO_DEFAULT_PATH
)

Expand All @@ -55,7 +61,7 @@ if (CLANG_FORMAT_VERSION)
PATHS
${ClangTools_PATH}
$ENV{CLANG_TOOLS_PATH}
/usr/local/bin /usr/bin
/usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
NO_DEFAULT_PATH
)

Expand All @@ -67,16 +73,26 @@ if (CLANG_FORMAT_VERSION)
if ("${CLANG_FORMAT_MINOR_VERSION}" STREQUAL "0")
find_program(CLANG_FORMAT_BIN
NAMES clang-format
PATHS /usr/local/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin
PATHS "${HOMEBREW_PREFIX}/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin"
NO_DEFAULT_PATH
)
else()
find_program(CLANG_FORMAT_BIN
NAMES clang-format
PATHS /usr/local/opt/llvm@${CLANG_FORMAT_VERSION}/bin
PATHS "${HOMEBREW_PREFIX}/opt/llvm@${CLANG_FORMAT_VERSION}/bin"
NO_DEFAULT_PATH
)
endif()

if ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
# binary was still not found, look into Cellar
file(GLOB CLANG_FORMAT_PATH "${HOMEBREW_PREFIX}/Cellar/llvm/${CLANG_FORMAT_VERSION}.*")
find_program(CLANG_FORMAT_BIN
NAMES clang-format
PATHS "${CLANG_FORMAT_PATH}/bin"
NO_DEFAULT_PATH
)
endif()
endif()
else()
find_program(CLANG_FORMAT_BIN
Expand All @@ -86,7 +102,7 @@ else()
clang-format-3.7
clang-format-3.6
clang-format
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
NO_DEFAULT_PATH
)
endif()
Expand Down
8 changes: 4 additions & 4 deletions src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ using arrow::Buffer;
using arrow::ChunkedArray;
using arrow::Column;
using arrow::DataType;
using arrow::default_memory_pool;
using arrow::ListArray;
using arrow::ResizableBuffer;
using arrow::PrimitiveArray;
using arrow::ResizableBuffer;
using arrow::Status;
using arrow::Table;
using arrow::TimeUnit;
using arrow::compute::Datum;
using arrow::compute::DictionaryEncode;
using arrow::compute::FunctionContext;
using arrow::default_memory_pool;
using arrow::io::BufferReader;

using arrow::test::randint;
Expand Down Expand Up @@ -1453,13 +1453,13 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
// Regression for ARROW-2802
TEST(TestArrowReadWrite, CoerceTimestampsAndSupportDeprecatedInt96) {
using ::arrow::Column;
using ::arrow::default_memory_pool;
using ::arrow::Field;
using ::arrow::Schema;
using ::arrow::Table;
using ::arrow::TimeUnit;
using ::arrow::TimestampBuilder;
using ::arrow::TimestampType;
using ::arrow::default_memory_pool;
using ::arrow::TimeUnit;

auto timestamp_type = std::make_shared<TimestampType>(TimeUnit::NANO);

Expand Down
11 changes: 6 additions & 5 deletions src/parquet/arrow/arrow-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test {
for (int i = 0; i < expected_schema->num_fields(); ++i) {
auto lhs = result_schema_->field(i);
auto rhs = expected_schema->field(i);
EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString()
<< " != " << rhs->ToString();
EXPECT_TRUE(lhs->Equals(rhs))
<< i << " " << lhs->ToString() << " != " << rhs->ToString();
}
}

Expand Down Expand Up @@ -607,9 +607,10 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) {
auto inner_group_type = std::make_shared<::arrow::StructType>(inner_group_fields);
auto outer_group_fields = {
std::make_shared<Field>("leaf2", INT32, true),
std::make_shared<Field>("innerGroup", ::arrow::list(std::make_shared<Field>(
"innerGroup", inner_group_type, false)),
false)};
std::make_shared<Field>(
"innerGroup",
::arrow::list(std::make_shared<Field>("innerGroup", inner_group_type, false)),
false)};
auto outer_group_type = std::make_shared<::arrow::StructType>(outer_group_fields);

arrow_fields.push_back(std::make_shared<Field>("leaf1", INT32, true));
Expand Down
4 changes: 1 addition & 3 deletions src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,7 @@ class PARQUET_NO_EXPORT StructImpl : public ColumnReader::ColumnReaderImpl {
public:
explicit StructImpl(const std::vector<std::shared_ptr<ColumnReaderImpl>>& children,
int16_t struct_def_level, MemoryPool* pool, const Node* node)
: children_(children),
struct_def_level_(struct_def_level),
pool_(pool) {
: children_(children), struct_def_level_(struct_def_level), pool_(pool) {
InitField(node, children);
}

Expand Down
13 changes: 6 additions & 7 deletions src/parquet/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,17 +402,16 @@ Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
&offsets_buffer));
memset(offsets_buffer->mutable_data(), 0, offsets_nbytes);

auto value_field = ::arrow::field("item", ::arrow::float64(),
false /* nullable_values */);
auto value_field =
::arrow::field("item", ::arrow::float64(), false /* nullable_values */);
auto list_type = ::arrow::list(value_field);

std::vector<std::shared_ptr<Buffer>> child_buffers = {nullptr /* null bitmap */,
nullptr /* values */ };
auto child_data = ::arrow::ArrayData::Make(value_field->type(), 0,
std::move(child_buffers));
nullptr /* values */};
auto child_data =
::arrow::ArrayData::Make(value_field->type(), 0, std::move(child_buffers));

std::vector<std::shared_ptr<Buffer>> buffers = {nullptr /* bitmap */,
offsets_buffer };
std::vector<std::shared_ptr<Buffer>> buffers = {nullptr /* bitmap */, offsets_buffer};
auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers));
array_data->child_data.push_back(child_data);

Expand Down
10 changes: 6 additions & 4 deletions src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ using arrow::Int16Builder;
using arrow::ListArray;
using arrow::MemoryPool;
using arrow::NumericArray;
using arrow::ResizableBuffer;
using arrow::PrimitiveArray;
using arrow::ResizableBuffer;
using arrow::Status;
using arrow::Table;
using arrow::TimeUnit;
Expand Down Expand Up @@ -216,9 +216,11 @@ class LevelBuilder {
if (level_null_count && level_valid_bitmap == nullptr) {
// Special case: this is a null array (all elements are null)
RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 1)));
} else if (nullable_level && ((level_null_count == 0) ||
BitUtil::GetBit(level_valid_bitmap,
inner_offset + i + array_offsets_[recursion_level]))) {
} else if (nullable_level &&
((level_null_count == 0) ||
BitUtil::GetBit(
level_valid_bitmap,
inner_offset + i + array_offsets_[recursion_level]))) {
// Non-null element in a null level
RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 2)));
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/parquet/encoding-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include "parquet/encoding-internal.h"
#include "parquet/util/memory.h"

using arrow::MemoryPool;
using arrow::default_memory_pool;
using arrow::MemoryPool;

namespace parquet {

Expand Down
2 changes: 1 addition & 1 deletion src/parquet/encoding-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#include "parquet/util/memory.h"
#include "parquet/util/test-common.h"

using arrow::MemoryPool;
using arrow::default_memory_pool;
using arrow::MemoryPool;

using std::string;
using std::vector;
Expand Down
8 changes: 4 additions & 4 deletions src/parquet/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ class Encoder {
virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
int64_t valid_bits_offset) {
std::shared_ptr<ResizableBuffer> buffer;
auto status = ::arrow::AllocateResizableBuffer(pool_, num_values * sizeof(T),
&buffer);
auto status =
::arrow::AllocateResizableBuffer(pool_, num_values * sizeof(T), &buffer);
if (!status.ok()) {
std::ostringstream ss;
ss << "AllocateResizableBuffer failed in Encoder.PutSpaced in "
<< __FILE__ << ", on line " << __LINE__;
ss << "AllocateResizableBuffer failed in Encoder.PutSpaced in " << __FILE__
<< ", on line " << __LINE__;
throw ParquetException(ss.str());
}
int32_t num_valid_values = 0;
Expand Down
9 changes: 4 additions & 5 deletions src/parquet/printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
std::string min = stats->EncodeMin(), max = stats->EncodeMax();
stream << ", Null Values: " << stats->null_count()
<< ", Distinct Values: " << stats->distinct_count() << std::endl
<< " Max: " << FormatStatValue(descr->physical_type(), max.c_str())
<< ", Min: " << FormatStatValue(descr->physical_type(), min.c_str());
<< " Max: " << FormatStatValue(descr->physical_type(), max)
<< ", Min: " << FormatStatValue(descr->physical_type(), min);
} else {
stream << " Statistics Not Set";
}
Expand Down Expand Up @@ -207,9 +207,8 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
std::string min = stats->EncodeMin(), max = stats->EncodeMax();
stream << "\"NumNulls\": \"" << stats->null_count() << "\", "
<< "\"DistinctValues\": \"" << stats->distinct_count() << "\", "
<< "\"Max\": \"" << FormatStatValue(descr->physical_type(), max.c_str())
<< "\", "
<< "\"Min\": \"" << FormatStatValue(descr->physical_type(), min.c_str())
<< "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", "
<< "\"Min\": \"" << FormatStatValue(descr->physical_type(), min)
<< "\" },";
} else {
stream << "\"False\",";
Expand Down
7 changes: 4 additions & 3 deletions src/parquet/statistics-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
#include "parquet/types.h"
#include "parquet/util/memory.h"

using arrow::MemoryPool;
using arrow::default_memory_pool;
using arrow::MemoryPool;

namespace parquet {

Expand Down Expand Up @@ -194,8 +194,9 @@ bool* TestRowGroupStatistics<BooleanType>::GetValuesPointer(std::vector<bool>& v
}

template <typename TestType>
typename std::vector<typename TestType::c_type> TestRowGroupStatistics<
TestType>::GetDeepCopy(const std::vector<typename TestType::c_type>& values) {
typename std::vector<typename TestType::c_type>
TestRowGroupStatistics<TestType>::GetDeepCopy(
const std::vector<typename TestType::c_type>& values) {
return values;
}

Expand Down
2 changes: 1 addition & 1 deletion src/parquet/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include "parquet/statistics.h"
#include "parquet/util/memory.h"

using arrow::MemoryPool;
using arrow::default_memory_pool;
using arrow::MemoryPool;

namespace parquet {

Expand Down
23 changes: 23 additions & 0 deletions src/parquet/types-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,54 +62,77 @@ TEST(TestLogicalTypeToString, LogicalTypes) {
}

TEST(TypePrinter, StatisticsTypes) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
std::string smin;
std::string smax;
int32_t int_min = 1024;
int32_t int_max = 2048;
smin = std::string(reinterpret_cast<char*>(&int_min), sizeof(int32_t));
smax = std::string(reinterpret_cast<char*>(&int_max), sizeof(int32_t));
ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin).c_str());
ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin.c_str()).c_str());
ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax).c_str());
ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax.c_str()).c_str());

int64_t int64_min = 10240000000000;
int64_t int64_max = 20480000000000;
smin = std::string(reinterpret_cast<char*>(&int64_min), sizeof(int64_t));
smax = std::string(reinterpret_cast<char*>(&int64_max), sizeof(int64_t));
ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin).c_str());
ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin.c_str()).c_str());
ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax).c_str());
ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax.c_str()).c_str());

float float_min = 1.024f;
float float_max = 2.048f;
smin = std::string(reinterpret_cast<char*>(&float_min), sizeof(float));
smax = std::string(reinterpret_cast<char*>(&float_max), sizeof(float));
ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin).c_str());
ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin.c_str()).c_str());
ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax).c_str());
ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax.c_str()).c_str());

double double_min = 1.0245;
double double_max = 2.0489;
smin = std::string(reinterpret_cast<char*>(&double_min), sizeof(double));
smax = std::string(reinterpret_cast<char*>(&double_max), sizeof(double));
ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin).c_str());
ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin.c_str()).c_str());
ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax).c_str());
ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax.c_str()).c_str());

Int96 Int96_min = {{1024, 2048, 4096}};
Int96 Int96_max = {{2048, 4096, 8192}};
smin = std::string(reinterpret_cast<char*>(&Int96_min), sizeof(Int96));
smax = std::string(reinterpret_cast<char*>(&Int96_max), sizeof(Int96));
ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin).c_str());
ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin.c_str()).c_str());
ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax).c_str());
ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax.c_str()).c_str());

smin = std::string("abcdef");
smax = std::string("ijklmnop");
ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin).c_str());
ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin.c_str()).c_str());
ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax).c_str());
ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax.c_str()).c_str());

// PARQUET-1357: FormatStatValue truncates binary statistics on zero character
smax.push_back('\0');
ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax));
// This fails, thus the call to FormatStatValue(.., const char*) was deprecated.
// ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax.c_str()));

smin = std::string("abcdefgh");
smax = std::string("ijklmnop");
ASSERT_STREQ("abcdefgh", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str());
ASSERT_STREQ("abcdefgh",
FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str());
ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str());
ASSERT_STREQ("ijklmnop",
FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str());
#pragma GCC diagnostic pop
}

} // namespace parquet

0 comments on commit 8cf2568

Please sign in to comment.