diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java index e55b276b29..da9db00c7b 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java @@ -48,6 +48,11 @@ private static enum MODE { private int currentCount; private int currentValue; private int[] currentBuffer; + private int currentBufferLength; + + // Reusable buffers to avoid per-run allocation in PACKED mode + private int[] packedValuesBuffer = new int[0]; + private byte[] packedBytesBuffer = new byte[0]; public RunLengthBitPackingHybridDecoder(int bitWidth, InputStream in) { LOG.debug("decoding bitWidth {}", bitWidth); @@ -69,7 +74,7 @@ public int readInt() throws IOException { result = currentValue; break; case PACKED: - result = currentBuffer[currentBuffer.length - 1 - currentCount]; + result = currentBuffer[currentBufferLength - 1 - currentCount]; break; default: throw new ParquetDecodingException("not a valid mode " + mode); @@ -90,17 +95,24 @@ private void readNext() throws IOException { case PACKED: int numGroups = header >>> 1; currentCount = numGroups * 8; + currentBufferLength = currentCount; LOG.debug("reading {} values BIT PACKED", currentCount); - currentBuffer = new int[currentCount]; // TODO: reuse a buffer - byte[] bytes = new byte[numGroups * bitWidth]; + if (packedValuesBuffer.length < currentCount) { + packedValuesBuffer = new int[currentCount]; + } + currentBuffer = packedValuesBuffer; + int bytesRequired = numGroups * bitWidth; + if (packedBytesBuffer.length < bytesRequired) { + packedBytesBuffer = new byte[bytesRequired]; + } // At the end of the file RLE data though, there might not be that many bytes left. int bytesToRead = (int) Math.ceil(currentCount * bitWidth / 8.0); bytesToRead = Math.min(bytesToRead, in.available()); - new DataInputStream(in).readFully(bytes, 0, bytesToRead); + new DataInputStream(in).readFully(packedBytesBuffer, 0, bytesToRead); for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) { - packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex); + packer.unpack8Values(packedBytesBuffer, byteIndex, currentBuffer, valueIndex); } break; default: