From f51fe346e39c377b1e75b245293a362129e9d259 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Wed, 27 Dec 2017 09:13:50 -0800 Subject: [PATCH] ORC-285. Empty vector batches of floats or doubles get java.io.EOFException Fixes #205 Signed-off-by: Owen O'Malley --- .../apache/orc/impl/TreeReaderFactory.java | 125 +++++++++--------- .../org/apache/orc/TestVectorOrcFile.java | 35 +++++ 2 files changed, 99 insertions(+), 61 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index 3e84a875a3..1891737a43 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -641,40 +641,42 @@ public void nextVector(ColumnVector previousVector, final boolean hasNulls = !result.noNulls; boolean allNulls = hasNulls; - if (hasNulls) { - // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) { - allNulls = allNulls & result.isNull[i]; - } - if (allNulls) { - result.vector[0] = Double.NaN; - result.isRepeating = true; - } else { - // some nulls - result.isRepeating = false; + if (batchSize > 0) { + if (hasNulls) { // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length - && batchSize <= result.vector.length && i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readFloat(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; + for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) { + allNulls = allNulls & result.isNull[i]; + } + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls + result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readFloat(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } } } + } else { + // no nulls & > 1 row (check repeating) + boolean repeating = (batchSize > 1); + final float f1 = utils.readFloat(stream); + result.vector[0] = f1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final float f2 = utils.readFloat(stream); + repeating = repeating && (f1 == f2); + result.vector[i] = f2; + } + result.isRepeating = repeating; } - } else { - // no nulls & > 1 row (check repeating) - boolean repeating = (batchSize > 1); - final float f1 = utils.readFloat(stream); - result.vector[0] = f1; - // conditions to ensure bounds checks skips - for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { - final float f2 = utils.readFloat(stream); - repeating = repeating && (f1 == f2); - result.vector[i] = f2; - } - result.isRepeating = repeating; } } @@ -734,41 +736,42 @@ public void nextVector(ColumnVector previousVector, final boolean hasNulls = !result.noNulls; boolean allNulls = hasNulls; - - if (hasNulls) { - // conditions to ensure bounds checks skips - for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) { - allNulls = allNulls & result.isNull[i]; - } - if (allNulls) { - result.vector[0] = Double.NaN; - result.isRepeating = true; - } else { - // some nulls - result.isRepeating = false; + if (batchSize != 0) { + if (hasNulls) { // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length - && batchSize <= result.vector.length && i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readDouble(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; + for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) { + allNulls = allNulls & result.isNull[i]; + } + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls + result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readDouble(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } } } + } else { + // no nulls + boolean repeating = (batchSize > 1); + final double d1 = utils.readDouble(stream); + result.vector[0] = d1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final double d2 = utils.readDouble(stream); + repeating = repeating && (d1 == d2); + result.vector[i] = d2; + } + result.isRepeating = repeating; } - } else { - // no nulls - boolean repeating = (batchSize > 1); - final double d1 = utils.readDouble(stream); - result.vector[0] = d1; - // conditions to ensure bounds checks skips - for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { - final double d2 = utils.readDouble(stream); - repeating = repeating && (d1 == d2); - result.vector[i] = d2; - } - result.isRepeating = repeating; } } diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index 62e3c05460..3992d4ee96 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -3289,4 +3289,39 @@ public void testFutureOrcFile() throws Exception { assertEquals(OrcProto.CompressionKind.NONE, ps.getCompression()); } } + + @Test + public void testEmptyDoubleStream() throws Exception { + TypeDescription schema = + TypeDescription.fromString("struct," + + "list2:array>"); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).setSchema(schema)); + VectorizedRowBatch batch = schema.createRowBatch(); + batch.size = 2; + ListColumnVector list1 = (ListColumnVector) batch.cols[0]; + ListColumnVector list2 = (ListColumnVector) batch.cols[1]; + for(int r=0; r < batch.size; ++r) { + list1.offsets[r] = 0; + list1.lengths[r] = 0; + list2.offsets[r] = 0; + list2.lengths[r] = 0; + } + writer.addRowBatch(batch); + writer.close(); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf)); + RecordReader rows = reader.rows(); + batch = reader.getSchema().createRowBatch(); + assertTrue(rows.nextBatch(batch)); + assertEquals(2, batch.size); + list1 = (ListColumnVector) batch.cols[0]; + list2 = (ListColumnVector) batch.cols[1]; + for(int r=0; r < batch.size; ++r) { + assertEquals(0, list1.lengths[r]); + assertEquals(0, list2.lengths[r]); + } + assertFalse(rows.nextBatch(batch)); + rows.close(); + } }