From 20cc4aa7ee823b7c3dbddfb0823b6a48ca5f0f14 Mon Sep 17 00:00:00 2001 From: Kamil Konior <57423201+Cheappie@users.noreply.github.com> Date: Mon, 2 May 2022 19:07:31 +0200 Subject: [PATCH] expose row-group flush in public api (#1634) * expose row-group flush in public api * a try to improve method names * tiny refactors, beautifying code --- parquet/src/arrow/arrow_writer.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index 3a53c2cf330..7ddd6443230 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -113,7 +113,6 @@ impl ArrowWriter { } self.buffered_rows += batch.num_rows(); - self.flush_completed()?; Ok(()) @@ -122,13 +121,18 @@ impl ArrowWriter { /// Flushes buffered data until there are less than `max_row_group_size` rows buffered fn flush_completed(&mut self) -> Result<()> { while self.buffered_rows >= self.max_row_group_size { - self.flush_row_group(self.max_row_group_size)?; + self.flush_rows(self.max_row_group_size)?; } Ok(()) } + /// Flushes all buffered rows into a new row group + pub fn flush(&mut self) -> Result<()> { + self.flush_rows(self.buffered_rows) + } + /// Flushes `num_rows` from the buffer into a new row group - fn flush_row_group(&mut self, num_rows: usize) -> Result<()> { + fn flush_rows(&mut self, num_rows: usize) -> Result<()> { if num_rows == 0 { return Ok(()); } @@ -192,8 +196,7 @@ impl ArrowWriter { /// Close and finalize the underlying Parquet writer pub fn close(&mut self) -> Result { - self.flush_completed()?; - self.flush_row_group(self.buffered_rows)?; + self.flush()?; self.writer.close() } }