Skip to content

Commit

Permalink
GH-36319: [Go][Parquet] Improved row group writer error messages (#36320
Browse files Browse the repository at this point in the history
)

### Rationale for this change

### What changes are included in this PR?

Updated error messages for mismatched column row counts to help identify the column, or rowgroup index which failed the check, and how many rows were expected vs found.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: #36319

Authored-by: Mark Wolfe <mark@wolfe.id.au>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
  • Loading branch information
wolfeidau committed Jul 5, 2023
1 parent 6521489 commit ff6717b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
8 changes: 6 additions & 2 deletions go/parquet/file/file_writer_test.go
Expand Up @@ -139,7 +139,9 @@ func (t *SerializeTestSuite) unequalNumRows(maxRows int64, rowsPerCol []int64) {
t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil)
cw.Close()
}
t.Error(rgw.Close())
err := rgw.Close()
t.Error(err)
t.ErrorContains(err, "row mismatch for unbuffered row group")
}

func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol []int64) {
Expand All @@ -154,7 +156,9 @@ func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol []
t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, t.DefLevels[:rowsPerCol[col]], nil)
cw.Close()
}
t.Error(rgw.Close())
err := rgw.Close()
t.Error(err)
t.ErrorContains(err, "row mismatch for buffered row group")
}

func (t *SerializeTestSuite) TestZeroRows() {
Expand Down
8 changes: 4 additions & 4 deletions go/parquet/file/row_group_writer.go
Expand Up @@ -110,13 +110,13 @@ func (rg *rowGroupWriter) checkRowsWritten() error {
if rg.nrows == 0 {
rg.nrows = current
} else if rg.nrows != current {
return xerrors.New("row mismatch")
return xerrors.Errorf("row mismatch for unbuffered row group: %d, count expected: %d, actual: %d", rg.ordinal, current, rg.nrows)
}
} else if rg.buffered {
current := rg.columnWriters[0].RowsWritten()
for _, wr := range rg.columnWriters[1:] {
for i, wr := range rg.columnWriters[1:] {
if current != wr.RowsWritten() {
return xerrors.New("row mismatch error")
return xerrors.Errorf("row mismatch for buffered row group: %d, column: %d, count expected: %d, actual: %d", rg.ordinal, i+1, current, wr.RowsWritten())
}
}
rg.nrows = current
Expand Down Expand Up @@ -182,7 +182,7 @@ func (rg *rowGroupWriter) Column(i int) (ColumnChunkWriter, error) {
if i >= 0 && i < len(rg.columnWriters) {
return rg.columnWriters[i], nil
}
return nil, xerrors.New("invalid column number requested")
return nil, xerrors.Errorf("invalid column number requested: %d", i)
}

func (rg *rowGroupWriter) CurrentColumn() int { return rg.metadata.CurrentColumn() }
Expand Down

0 comments on commit ff6717b

Please sign in to comment.