diff --git a/NOTICE.txt b/NOTICE.txt index 0310c897cd743..a85101617cec8 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -7,3 +7,8 @@ The Apache Software Foundation (http://www.apache.org/). This product includes software from the SFrame project (BSD, 3-clause). * Copyright (C) 2015 Dato, Inc. * Copyright (c) 2009 Carnegie Mellon University. + +This product includes software from the Numpy project (BSD-new) + https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 + * Copyright (c) 1995, 1996, 1997 Jim Hugunin, hugunin@mit.edu + * Copyright (c) 2005 Travis E. Oliphant oliphant@ee.byu.edu Brigham Young University diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index e6b47de70ed70..1d3edf0117f91 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -243,6 +243,17 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, std::shared_ptr* // Implement MessageBuilder +// will return the endianness of the system we are running on +// based the NUMPY_API function. See NOTICE.txt +flatbuf::Endianness endianness() { + union { + uint32_t i; + char c[4]; + } bint = {0x01020304}; + + return bint.c[0] == 1 ? flatbuf::Endianness_Big : flatbuf::Endianness_Little; +} + Status MessageBuilder::SetSchema(const Schema* schema) { header_type_ = flatbuf::MessageHeader_Schema; @@ -254,7 +265,11 @@ Status MessageBuilder::SetSchema(const Schema* schema) { field_offsets.push_back(offset); } - header_ = flatbuf::CreateSchema(fbb_, fbb_.CreateVector(field_offsets)).Union(); + header_ = flatbuf::CreateSchema( + fbb_, + endianness(), + fbb_.CreateVector(field_offsets)) + .Union(); body_length_ = 0; return Status::OK(); } @@ -263,7 +278,8 @@ Status MessageBuilder::SetRecordBatch(int32_t length, int64_t body_length, const std::vector& nodes, const std::vector& buffers) { header_type_ = flatbuf::MessageHeader_RecordBatch; - header_ = flatbuf::CreateRecordBatch(fbb_, length, fbb_.CreateVectorOfStructs(nodes), + header_ = flatbuf::CreateRecordBatch(fbb_, length, + fbb_.CreateVectorOfStructs(nodes), fbb_.CreateVectorOfStructs(buffers)) .Union(); body_length_ = body_length; diff --git a/format/Arrow.graffle b/format/Arrow.graffle new file mode 100644 index 0000000000000..453e85025d8d3 Binary files /dev/null and b/format/Arrow.graffle differ diff --git a/format/Arrow.png b/format/Arrow.png new file mode 100644 index 0000000000000..361dc82ed5a45 Binary files /dev/null and b/format/Arrow.png differ diff --git a/format/Layout.md b/format/Layout.md index 815c47f2c934b..5eaefeebf210a 100644 --- a/format/Layout.md +++ b/format/Layout.md @@ -78,7 +78,14 @@ Base requirements ## Byte Order ([Endianness][3]) -The Arrow format is little endian. +The Arrow format is little endian by default. +The Schema metadata has an endianness field indicating endianness of RecordBatches. +Typically this is the endianness of the system where the RecordBatch was generated. +The main use case is exchanging RecordBatches between systems with the same Endianness. +At first we will return an error when trying to read a Schema with an endianness +that does not match the underlying system. The reference implementation is focused on +Little Endian and provides tests for it. Eventually we may provide automatic conversion +via byte swapping. ## Alignment and Padding diff --git a/format/Message.fbs b/format/Message.fbs index 6a351b9dbf0a6..3f688c156e3ea 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -87,10 +87,21 @@ table Field { children: [Field]; } +/// ---------------------------------------------------------------------- +/// Endianness of the platform that produces the RecordBatch + +enum Endianness:int { Little, Big } + /// ---------------------------------------------------------------------- /// A Schema describes the columns in a row batch table Schema { + + /// endianness of the buffer + /// it is Little Endian by default + /// if endianness doesn't match the underlying system then the vectors need to be converted + endianness: Endianness=Little; + fields: [Field]; }