Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
PARQUET-1164: [C++] Account for API changes in ARROW-1808
Browse files Browse the repository at this point in the history
Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #418 from wesm/PARQUET-1164 and squashes the following commits:

ca18e60 [Wes McKinney] Bump Arrow version to include ARROW-1808
d580b4f [Wes McKinney] Refactor to account for ARROW-1808
  • Loading branch information
wesm committed Nov 22, 2017
1 parent 6a2ed4f commit 9b39fbd
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 50 deletions.
2 changes: 1 addition & 1 deletion cmake_modules/ThirdpartyToolchain.cmake
Expand Up @@ -366,7 +366,7 @@ if (NOT ARROW_FOUND)
-DARROW_BUILD_TESTS=OFF)

if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
set(ARROW_VERSION "f2806fa518583907a129b2ecb0b7ec8758b69e17")
set(ARROW_VERSION "fc4e2c36d2c56a8bd5d1ab17eeb406826924d3e5")
else()
set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
endif()
Expand Down
9 changes: 3 additions & 6 deletions src/parquet/arrow/arrow-reader-writer-benchmark.cc
Expand Up @@ -112,11 +112,9 @@ std::shared_ptr<::arrow::Table> TableFromVector(
EXIT_NOT_OK(builder.Finish(&array));

auto field = ::arrow::field("column", type, nullable);
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
auto schema = ::arrow::schema({field});
auto column = std::make_shared<::arrow::Column>(field, array);
return std::make_shared<::arrow::Table>(
schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
return ::arrow::Table::Make(schema, {column});
}

template <>
Expand All @@ -139,8 +137,7 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
auto column = std::make_shared<::arrow::Column>(field, array);
return std::make_shared<::arrow::Table>(
schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
return ::arrow::Table::Make(schema, {column});
}

template <bool nullable, typename ParquetType>
Expand Down
55 changes: 26 additions & 29 deletions src/parquet/arrow/arrow-reader-writer-test.cc
Expand Up @@ -23,8 +23,8 @@

#include "gtest/gtest.h"

#include <sstream>
#include <arrow/compute/api.h>
#include <sstream>

#include "parquet/api/reader.h"
#include "parquet/api/writer.h"
Expand Down Expand Up @@ -1145,7 +1145,7 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool nanos_as_micros =
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
std::make_shared<Column>("f2", a2), std::make_shared<Column>("f3", a3),
std::make_shared<Column>("f4", a4), std::make_shared<Column>("f5", a5)};
*out = std::make_shared<::arrow::Table>(schema, columns);
*out = Table::Make(schema, columns);
}

TEST(TestArrowReadWrite, DateTimeTypes) {
Expand Down Expand Up @@ -1199,31 +1199,28 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
auto s1 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us),
field("f_ns", t_ns)}));
auto input = std::make_shared<::arrow::Table>(
s1, ColumnVector({std::make_shared<Column>("f_s", a_s),
std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_us),
std::make_shared<Column>("f_ns", a_ns)}));
auto input = Table::Make(
s1,
{std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});

// Result when coercing to milliseconds
auto s2 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms),
field("f_ns", t_ms)}));
auto ex_milli_result = std::make_shared<::arrow::Table>(
s2, ColumnVector({std::make_shared<Column>("f_s", a_ms),
std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_ms),
std::make_shared<Column>("f_ns", a_ms)}));
auto ex_milli_result = Table::Make(
s2,
{std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns", a_ms)});

// Result when coercing to microseconds
auto s3 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
field("f_ns", t_us)}));
auto ex_micro_result = std::make_shared<::arrow::Table>(
s3, ColumnVector({std::make_shared<Column>("f_s", a_us),
std::make_shared<Column>("f_ms", a_us),
std::make_shared<Column>("f_us", a_us),
std::make_shared<Column>("f_ns", a_us)}));
auto ex_micro_result = Table::Make(
s3,
{std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms", a_us),
std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_us)});

std::shared_ptr<Table> milli_result;
DoSimpleRoundtrip(
Expand Down Expand Up @@ -1276,10 +1273,10 @@ TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) {
auto c3 = std::make_shared<Column>("f_us", a_us);
auto c4 = std::make_shared<Column>("f_ns", a_ns);

auto t1 = std::make_shared<::arrow::Table>(s1, ColumnVector({c1}));
auto t2 = std::make_shared<::arrow::Table>(s2, ColumnVector({c2}));
auto t3 = std::make_shared<::arrow::Table>(s3, ColumnVector({c3}));
auto t4 = std::make_shared<::arrow::Table>(s4, ColumnVector({c4}));
auto t1 = Table::Make(s1, {c1});
auto t2 = Table::Make(s2, {c2});
auto t3 = Table::Make(s3, {c3});
auto t4 = Table::Make(s4, {c4});

auto sink = std::make_shared<InMemoryOutputStream>();

Expand Down Expand Up @@ -1327,7 +1324,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {

std::vector<std::shared_ptr<::arrow::Column>> columns = {
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1)};
auto table = std::make_shared<::arrow::Table>(schema, columns);
auto table = Table::Make(schema, columns);

// Expected schema and values
auto e0 = field("f0", ::arrow::date32());
Expand All @@ -1341,7 +1338,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {

std::vector<std::shared_ptr<::arrow::Column>> ex_columns = {
std::make_shared<Column>("f0", x0), std::make_shared<Column>("f1", x1)};
auto ex_table = std::make_shared<::arrow::Table>(ex_schema, ex_columns);
auto ex_table = Table::Make(ex_schema, ex_columns);

std::shared_ptr<Table> result;
DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result);
Expand Down Expand Up @@ -1372,7 +1369,7 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
fields[i] = column->field();
}
auto schema = std::make_shared<::arrow::Schema>(fields);
*out = std::make_shared<Table>(schema, columns);
*out = Table::Make(schema, columns);
}

TEST(TestArrowReadWrite, MultithreadedRead) {
Expand Down Expand Up @@ -1459,9 +1456,9 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
ex_fields.push_back(table->column(i)->field());
}

auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields);
Table expected(ex_schema, ex_columns);
AssertTablesEqual(expected, *result);
auto ex_schema = ::arrow::schema(ex_fields);
auto expected = Table::Make(ex_schema, ex_columns);
AssertTablesEqual(*expected, *result);
}

void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
Expand Down Expand Up @@ -1501,7 +1498,7 @@ void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
auto f1 = ::arrow::field("a", ::arrow::list(::arrow::int8()));
auto schema = ::arrow::schema({f1});
std::vector<std::shared_ptr<Array>> arrays = {list_array};
*out = std::make_shared<Table>(schema, arrays);
*out = Table::Make(schema, arrays);
}

TEST(TestArrowReadWrite, ListLargeRecords) {
Expand Down Expand Up @@ -1544,7 +1541,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
auto chunked_col =
std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
std::vector<std::shared_ptr<::arrow::Column>> columns = {chunked_col};
auto chunked_table = std::make_shared<Table>(table->schema(), columns);
auto chunked_table = Table::Make(table->schema(), columns);

ASSERT_TRUE(table->Equals(*chunked_table));
}
Expand Down
4 changes: 2 additions & 2 deletions src/parquet/arrow/arrow-schema-test.cc
Expand Up @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test {
for (int i = 0; i < expected_schema->num_fields(); ++i) {
auto lhs = result_schema_->field(i);
auto rhs = expected_schema->field(i);
EXPECT_TRUE(lhs->Equals(rhs))
<< i << " " << lhs->ToString() << " != " << rhs->ToString();
EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString()
<< " != " << rhs->ToString();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/parquet/arrow/reader.cc
Expand Up @@ -431,7 +431,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc));
}

*out = std::make_shared<Table>(schema, columns);
*out = Table::Make(schema, columns);
return Status::OK();
}

Expand Down Expand Up @@ -466,7 +466,7 @@ Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
RETURN_NOT_OK(ParallelFor(nthreads, num_fields, ReadColumnFunc));
}

*table = std::make_shared<Table>(schema, columns);
*table = Table::Make(schema, columns);
return Status::OK();
}

Expand Down
2 changes: 1 addition & 1 deletion src/parquet/arrow/test-util.h
Expand Up @@ -414,7 +414,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& va
std::vector<std::shared_ptr<::arrow::Column>> columns({column});
std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
auto schema = std::make_shared<::arrow::Schema>(fields);
return std::make_shared<::arrow::Table>(schema, columns);
return ::arrow::Table::Make(schema, columns);
}

template <typename T>
Expand Down
18 changes: 9 additions & 9 deletions src/parquet/file/reader.cc
Expand Up @@ -45,9 +45,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
: contents_(std::move(contents)) {}

std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
DCHECK(i < metadata()->num_columns())
<< "The RowGroup only has " << metadata()->num_columns()
<< "columns, requested column: " << i;
DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
<< metadata()->num_columns()
<< "columns, requested column: " << i;
const ColumnDescriptor* descr = metadata()->schema()->Column(i);

std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
Expand All @@ -57,9 +57,9 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
}

std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
DCHECK(i < metadata()->num_columns())
<< "The RowGroup only has " << metadata()->num_columns()
<< "columns, requested column: " << i;
DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
<< metadata()->num_columns()
<< "columns, requested column: " << i;
return contents_->GetColumnPageReader(i);
}

Expand Down Expand Up @@ -127,9 +127,9 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
}

std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
DCHECK(i < metadata()->num_row_groups())
<< "The file only has " << metadata()->num_row_groups()
<< "row groups, requested reader for: " << i;
DCHECK(i < metadata()->num_row_groups()) << "The file only has "
<< metadata()->num_row_groups()
<< "row groups, requested reader for: " << i;
return contents_->GetRowGroup(i);
}

Expand Down

0 comments on commit 9b39fbd

Please sign in to comment.