Skip to content

Commit

Permalink
ARROW-3894: [C++] Ensure that IPC file is properly initialized even i…
Browse files Browse the repository at this point in the history
…f no record batches are written

Without invoking `Start()`, the file cannot be read

Author: Wes McKinney <wesm+git@apache.org>

Closes #3095 from wesm/ARROW-3894 and squashes the following commits:

5fc6528 <Wes McKinney> Also compare schemas
1074a69 <Wes McKinney> Ensure that IPC file is properly initialized even if no record batches are written
  • Loading branch information
wesm committed Dec 5, 2018
1 parent 5b85981 commit 8152916
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cpp/src/arrow/ipc/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
}

Status Close() override {
// Write the schema if not already written
// User is responsible for closing the OutputStream
RETURN_NOT_OK(CheckStarted());

// Write metadata
RETURN_NOT_OK(UpdatePosition());

Expand Down
16 changes: 16 additions & 0 deletions python/pyarrow/tests/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,3 +639,19 @@ def read_file(source):
reader = pa.open_file(source)
return [reader.get_batch(i)
for i in range(reader.num_record_batches)]


def test_write_empty_ipc_file():
# ARROW-3894: IPC file was not being properly initialized when no record
# batches are being written
schema = pa.schema([('field', pa.int64())])

sink = pa.BufferOutputStream()
writer = pa.RecordBatchFileWriter(sink, schema)
writer.close()

buf = sink.getvalue()
reader = pa.RecordBatchFileReader(pa.BufferReader(buf))
table = reader.read_all()
assert len(table) == 0
assert table.schema.equals(schema)

0 comments on commit 8152916

Please sign in to comment.