Skip to content

Commit

Permalink
ARROW-7063: [C++][Python] Add metadata output and toggle in PrettyPri…
Browse files Browse the repository at this point in the history
…nt, add pyarrow.Schema.to_string, disable metadata output by default

There was a temporary hack to suppress outputting the metadata in ARROW-7080 so this is a more complete working over, and adding metadata output. I disabled the default metadata output in Python (which is often more of a distraction than anything) since it seems several others agreed that was the way to go

Closes #6472 from wesm/ARROW-7063 and squashes the following commits:

38e7a9c <Wes McKinney> Add show_metadata option to PrettyPrintOptions, add metadata output to PrettyPrint. Add Python Schema.to_string option, disable metadata output by default

Authored-by: Wes McKinney <wesm+git@apache.org>
Signed-off-by: François Saint-Jacques <fsaintjacques@gmail.com>
  • Loading branch information
wesm authored and fsaintjacques committed Feb 21, 2020
1 parent 2688a62 commit 3c2f65f
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 44 deletions.
67 changes: 45 additions & 22 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/int_util.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/string.h"
#include "arrow/vendored/datetime.h"
#include "arrow/visitor_inline.h"
Expand All @@ -44,12 +45,11 @@ using internal::checked_cast;

class PrettyPrinter {
public:
PrettyPrinter(int indent, int indent_size, int window, bool skip_new_lines,
std::ostream* sink)
: indent_(indent),
indent_size_(indent_size),
window_(window),
skip_new_lines_(skip_new_lines),
PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink)
: indent_(options.indent),
indent_size_(options.indent_size),
window_(options.window),
skip_new_lines_(options.skip_new_lines),
sink_(sink) {}

void Write(const char* data);
Expand Down Expand Up @@ -117,10 +117,8 @@ void PrettyPrinter::Indent() {

class ArrayPrinter : public PrettyPrinter {
public:
ArrayPrinter(int indent, int indent_size, int window, const std::string& null_rep,
bool skip_new_lines, std::ostream* sink)
: PrettyPrinter(indent, indent_size, window, skip_new_lines, sink),
null_rep_(null_rep) {}
ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink)
: PrettyPrinter(options, sink), null_rep_(options.null_rep) {}

template <typename FormatFunction>
void WriteValues(const Array& array, FormatFunction&& func) {
Expand Down Expand Up @@ -441,14 +439,15 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
}

Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
ArrayPrinter printer(indent, 2, 10, "null", false, sink);
PrettyPrintOptions options;
options.indent = indent;
ArrayPrinter printer(options, sink);
return printer.Print(arr);
}

Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
std::ostream* sink) {
ArrayPrinter printer(options.indent, options.indent_size, options.window,
options.null_rep, options.skip_new_lines, sink);
ArrayPrinter printer(options, sink);
return printer.Print(arr);
}

Expand Down Expand Up @@ -485,8 +484,9 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
i = num_chunks - window - 1;
skip_comma = true;
} else {
ArrayPrinter printer(indent + options.indent_size, options.indent_size, window,
options.null_rep, options.skip_new_lines, sink);
PrettyPrintOptions chunk_options = options;
chunk_options.indent += options.indent_size;
ArrayPrinter printer(chunk_options, sink);
RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
}
}
Expand Down Expand Up @@ -560,14 +560,26 @@ Status DebugPrint(const Array& arr, int indent) {

class SchemaPrinter : public PrettyPrinter {
public:
SchemaPrinter(const Schema& schema, int indent, int indent_size, int window,
bool skip_new_lines, std::ostream* sink)
: PrettyPrinter(indent, indent_size, window, skip_new_lines, sink),
schema_(schema) {}
SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options,
std::ostream* sink)
: PrettyPrinter(options, sink),
schema_(schema),
show_metadata_(options.show_metadata) {}

Status PrintType(const DataType& type, bool nullable);
Status PrintField(const Field& field);

void PrintMetadata(const KeyValueMetadata& metadata) {
if (metadata.size() > 0) {
Newline();
Write("-- metadata --");
for (int64_t i = 0; i < metadata.size(); ++i) {
Newline();
Write(metadata.key(i) + ": " + metadata.value(i));
}
}
}

Status Print() {
for (int i = 0; i < schema_.num_fields(); ++i) {
if (i > 0) {
Expand All @@ -577,12 +589,17 @@ class SchemaPrinter : public PrettyPrinter {
}
RETURN_NOT_OK(PrintField(*schema_.field(i)));
}

if (show_metadata_ && schema_.metadata()) {
PrintMetadata(*schema_.metadata());
}
Flush();
return Status::OK();
}

private:
const Schema& schema_;
bool show_metadata_;
};

Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
Expand All @@ -607,13 +624,19 @@ Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
Status SchemaPrinter::PrintField(const Field& field) {
Write(field.name());
Write(": ");
return PrintType(*field.type(), field.nullable());
RETURN_NOT_OK(PrintType(*field.type(), field.nullable()));

if (show_metadata_ && field.metadata()) {
indent_ += indent_size_;
PrintMetadata(*field.metadata());
indent_ -= indent_size_;
}
return Status::OK();
}

Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
std::ostream* sink) {
SchemaPrinter printer(schema, options.indent, options.indent_size, options.window,
options.skip_new_lines, sink);
SchemaPrinter printer(schema, options, sink);
return printer.Print();
}

Expand Down
11 changes: 8 additions & 3 deletions cpp/src/arrow/pretty_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ class Status;
class Table;

struct PrettyPrintOptions {
PrettyPrintOptions(int indent_arg, int window_arg = 10, int indent_size_arg = 2,
std::string null_rep_arg = "null", bool skip_new_lines_arg = false)
PrettyPrintOptions(int indent_arg = 0, int window_arg = 10, int indent_size_arg = 2,
std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
bool show_metadata = false)
: indent(indent_arg),
indent_size(indent_size_arg),
window(window_arg),
null_rep(null_rep_arg),
skip_new_lines(skip_new_lines_arg) {}
skip_new_lines(skip_new_lines_arg),
show_metadata(show_metadata) {}

/// Number of spaces to shift entire formatted object to the right
int indent;
Expand All @@ -55,6 +57,9 @@ struct PrettyPrintOptions {

/// Skip new lines between elements, defaults to false
bool skip_new_lines;

/// Show Schema and Field-level KeyValueMetadata
bool show_metadata;
};

/// \brief Print human-readable representation of RecordBatch
Expand Down
31 changes: 26 additions & 5 deletions cpp/src/arrow/pretty_print_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "arrow/table.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
#include "arrow/util/key_value_metadata.h"

namespace arrow {

Expand Down Expand Up @@ -623,8 +624,7 @@ four: struct<one: int32, two: dictionary<values=string, indices=int16, ordered=0
child 0, one: int32
child 1, two: dictionary<values=string, indices=int16, ordered=0>)expected";

PrettyPrintOptions options{0};

PrettyPrintOptions options;
Check(*sch, options, expected);
}

Expand All @@ -646,11 +646,32 @@ four: list<item: int32> not null
five: list<item: int32 not null>
child 0, item: int32 not null)expected";

PrettyPrintOptions options{0};

PrettyPrintOptions options;
Check(*sch, options, expected);
}

TEST_F(TestPrettyPrint, SchemaWithMetadata) {
// ARROW-7063
auto metadata1 = key_value_metadata({"foo"}, {"bar1"});
auto metadata2 = key_value_metadata({"foo"}, {"bar2"});
auto metadata3 = key_value_metadata({"foo"}, {"bar3"});
auto my_schema = schema(
{field("one", int32(), true, metadata1), field("two", utf8(), false, metadata2)},
metadata3);

static const char* expected = R"expected(one: int32
-- metadata --
foo: bar1
two: string not null
-- metadata --
foo: bar2
-- metadata --
foo: bar3)expected";
PrettyPrintOptions options;
options.show_metadata = true;
Check(*my_schema, options, expected);
}

TEST_F(TestPrettyPrint, SchemaIndentation) {
// ARROW-6159
auto simple = field("one", int32());
Expand All @@ -660,7 +681,7 @@ TEST_F(TestPrettyPrint, SchemaIndentation) {
static const char* expected = R"expected( one: int32
two: int32 not null)expected";

PrettyPrintOptions options{/*indent=*/4};
PrettyPrintOptions options(/*indent=*/4);
Check(*sch, options, expected);
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,13 @@ bool Field::IsCompatibleWith(const std::shared_ptr<Field>& other) const {
return IsCompatibleWith(*other);
}

std::string Field::ToString(bool print_metadata) const {
std::string Field::ToString(bool show_metadata) const {
std::stringstream ss;
ss << name_ << ": " << type_->ToString();
if (!nullable_) {
ss << " not null";
}
if (print_metadata && metadata_) {
if (show_metadata && metadata_) {
ss << metadata_->ToString();
}
return ss.str();
Expand Down Expand Up @@ -798,7 +798,7 @@ Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
return Status::OK();
}

std::string Schema::ToString(bool print_metadata) const {
std::string Schema::ToString(bool show_metadata) const {
std::stringstream buffer;

int i = 0;
Expand All @@ -810,7 +810,7 @@ std::string Schema::ToString(bool print_metadata) const {
++i;
}

if (print_metadata && HasMetadata()) {
if (show_metadata && HasMetadata()) {
buffer << impl_->metadata_->ToString();
}

Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ class ARROW_EXPORT Field : public detail::Fingerprintable {
bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;

/// \brief Return a string representation ot the field
std::string ToString(bool print_metadata = false) const;
/// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
/// print keys and values in the output
std::string ToString(bool show_metadata = false) const;

/// \brief Return the field name
const std::string& name() const { return name_; }
Expand Down Expand Up @@ -1452,7 +1454,9 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
std::shared_ptr<const KeyValueMetadata> metadata() const;

/// \brief Render a string representation of the schema suitable for debugging
std::string ToString(bool print_metadata = false) const;
/// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
/// print keys and values in the output
std::string ToString(bool show_metadata = false) const;

Status AddField(int i, const std::shared_ptr<Field>& field,
std::shared_ptr<Schema>* out) const;
Expand Down
5 changes: 5 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
shared_ptr[CSchema] RemoveMetadata()

cdef cppclass PrettyPrintOptions:
PrettyPrintOptions()
PrettyPrintOptions(int indent_arg)
PrettyPrintOptions(int indent_arg, int window_arg)
int indent
int indent_size
int window
c_string null_rep
c_bool skip_new_lines
c_bool show_metadata

CStatus PrettyPrint(const CArray& schema,
const PrettyPrintOptions& options,
Expand Down
14 changes: 14 additions & 0 deletions python/pyarrow/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,20 @@ def test_schema():
pa.schema([None])


def test_schema_to_string_with_metadata():
# ARROW-7063
my_schema = pa.schema([pa.field("foo", "int32", False,
metadata={"key1": "value1"})],
metadata={"key2": "value2"})

assert my_schema.to_string(show_metadata=True) == """\
foo: int32 not null
-- metadata --
key1: value1
-- metadata --
key2: value2"""


def test_schema_from_tuples():
fields = [
('foo', pa.int32()),
Expand Down
29 changes: 21 additions & 8 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1304,26 +1304,39 @@ cdef class Schema:
new_schema = self.schema.RemoveMetadata()
return pyarrow_wrap_schema(new_schema)

def __str__(self):
def to_string(self, bint show_metadata=False):
"""
Return human-readable representation of Schema
Parameters
----------
show_metadata : boolean, default False
If True, and there is non-empty metadata, it will be printed after
the column names and types
Returns
-------
str : the formatted output
"""
cdef:
c_string result
PrettyPrintOptions options

with nogil:
options.indent = 0
options.show_metadata = show_metadata
check_status(
PrettyPrint(
deref(self.schema),
PrettyPrintOptions(0),
options,
&result
)
)

printed = frombytes(result)
if self.metadata is not None:
import pprint
metadata_formatted = pprint.pformat(self.metadata)
printed += '\nmetadata\n--------\n' + metadata_formatted
return frombytes(result)

return printed
def __str__(self):
return self.to_string(show_metadata=False)

def __repr__(self):
return self.__str__()
Expand Down

0 comments on commit 3c2f65f

Please sign in to comment.