apache · siddharthteotia · Apr 23, 2020 · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020
diff --git a/...ore/src/main/java/org/apache/pinot/core/io/reader/impl/v1/BaseChunkSingleValueReader.java b/...ore/src/main/java/org/apache/pinot/core/io/reader/impl/v1/BaseChunkSingleValueReader.java
@@ -24,6 +24,7 @@
 import org.apache.pinot.core.io.compression.ChunkDecompressor;
 import org.apache.pinot.core.io.reader.BaseSingleColumnSingleValueReader;
 import org.apache.pinot.core.io.reader.impl.ChunkReaderContext;
+import org.apache.pinot.core.io.writer.impl.v1.VarByteChunkSingleValueWriter;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -47,6 +48,7 @@ public abstract class BaseChunkSingleValueReader extends BaseSingleColumnSingleV
   protected final int _numDocsPerChunk;
   protected final int _numChunks;
   protected final int _lengthOfLongestEntry;
+  private final int _version;
 
   /**
    * Constructor for the class.
@@ -57,7 +59,7 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     _dataBuffer = pinotDataBuffer;
 
     int headerOffset = 0;
-    int version = _dataBuffer.getInt(headerOffset);
+    _version = _dataBuffer.getInt(headerOffset);
     headerOffset += Integer.BYTES;
 
     _numChunks = _dataBuffer.getInt(headerOffset);
@@ -70,7 +72,7 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     headerOffset += Integer.BYTES;
 
     int dataHeaderStart = headerOffset;
-    if (version > 1) {
+    if (_version > 1) {
       _dataBuffer.getInt(headerOffset); // Total docs
       headerOffset += Integer.BYTES;
 
@@ -89,7 +91,7 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     _chunkSize = (_lengthOfLongestEntry * _numDocsPerChunk);
 
     // Slice out the header from the data buffer.
-    int dataHeaderLength = _numChunks * Integer.BYTES;
+    int dataHeaderLength = _numChunks * VarByteChunkSingleValueWriter.FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE;
     int rawDataStart = dataHeaderStart + dataHeaderLength;
     _dataHeader = _dataBuffer.view(dataHeaderStart, rawDataStart);
 
@@ -120,14 +122,14 @@ protected ByteBuffer getChunkForRow(int row, ChunkReaderContext context) {
     }
 
     int chunkSize;
-    int chunkPosition = getChunkPosition(chunkId);
+    long chunkPosition = getChunkPosition(chunkId);
 
     // Size of chunk can be determined using next chunks offset, or end of data buffer for last chunk.
     if (chunkId == (_numChunks - 1)) { // Last chunk.
       chunkSize = (int) (_dataBuffer.size() - chunkPosition);
     } else {
-      int nextChunkOffset = getChunkPosition(chunkId + 1);
-      chunkSize = nextChunkOffset - chunkPosition;
+      long nextChunkOffset = getChunkPosition(chunkId + 1);
+      chunkSize = (int)(nextChunkOffset - chunkPosition);
     }
 
     ByteBuffer decompressedBuffer = context.getChunkBuffer();
@@ -149,8 +151,11 @@ protected ByteBuffer getChunkForRow(int row, ChunkReaderContext context) {
    * @param chunkId Id of the chunk for which to return the position.
    * @return Position (offset) of the chunk in the data.
    */
-  protected int getChunkPosition(int chunkId) {
-    return _dataHeader.getInt(chunkId * Integer.BYTES);
+  protected long getChunkPosition(int chunkId) {
+    if (_version < 3) {
+      return _dataHeader.getInt(chunkId * VarByteChunkSingleValueWriter.FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V1V2);
+    }
+    return _dataHeader.getLong(chunkId * VarByteChunkSingleValueWriter.FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE);
   }
 
   /**

diff --git a/.../src/main/java/org/apache/pinot/core/io/reader/impl/v1/VarByteChunkSingleValueReader.java b/.../src/main/java/org/apache/pinot/core/io/reader/impl/v1/VarByteChunkSingleValueReader.java
@@ -55,7 +55,7 @@ public String getString(int row, ChunkReaderContext context) {
     int chunkRowId = row % _numDocsPerChunk;
     ByteBuffer chunkBuffer = getChunkForRow(row, context);
 
-    int rowOffset = chunkBuffer.getInt(chunkRowId * Integer.BYTES);
+    int rowOffset = chunkBuffer.getInt(chunkRowId * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
     int nextRowOffset = getNextRowOffset(chunkRowId, chunkBuffer);
 
     int length = nextRowOffset - rowOffset;
@@ -77,7 +77,7 @@ public byte[] getBytes(int row, ChunkReaderContext context) {
     int chunkRowId = row % _numDocsPerChunk;
     ByteBuffer chunkBuffer = getChunkForRow(row, context);
 
-    int rowOffset = chunkBuffer.getInt(chunkRowId * Integer.BYTES);
+    int rowOffset = chunkBuffer.getInt(chunkRowId * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
     int nextRowOffset = getNextRowOffset(chunkRowId, chunkBuffer);
 
     int length = nextRowOffset - rowOffset;
@@ -109,7 +109,7 @@ private int getNextRowOffset(int currentRowId, ByteBuffer chunkBuffer) {
       // Last row in this trunk.
       nextRowOffset = chunkBuffer.limit();
     } else {
-      nextRowOffset = chunkBuffer.getInt((currentRowId + 1) * Integer.BYTES);
+      nextRowOffset = chunkBuffer.getInt((currentRowId + 1) * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
       // For incomplete chunks, the next string's offset will be 0 as row offset for absent rows are 0.
       if (nextRowOffset == 0) {
         nextRowOffset = chunkBuffer.limit();

diff --git a/...ore/src/main/java/org/apache/pinot/core/io/writer/impl/v1/BaseChunkSingleValueWriter.java b/...ore/src/main/java/org/apache/pinot/core/io/writer/impl/v1/BaseChunkSingleValueWriter.java
@@ -37,6 +37,8 @@
  */
 public abstract class BaseChunkSingleValueWriter implements SingleColumnSingleValueWriter {
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseChunkSingleValueWriter.class);
+  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V1V2 = Integer.BYTES;
+  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE = Long.BYTES;
-  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE = Long.BYTES;
+  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V3 = Long.BYTES;
-  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE = Long.BYTES;
+  public static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V3 = Long.BYTES;
 
   protected final FileChannel _dataFile;
   protected ByteBuffer _header;
@@ -45,7 +47,7 @@ public abstract class BaseChunkSingleValueWriter implements SingleColumnSingleVa
   protected final ChunkCompressor _chunkCompressor;
 
   protected int _chunkSize;
-  protected int _dataOffset;
+  protected long _dataOffset;
 
   /**
    * Constructor for the class.
@@ -139,7 +141,8 @@ public void close()
   private int writeHeader(ChunkCompressorFactory.CompressionType compressionType, int totalDocs, int numDocsPerChunk,
       int sizeOfEntry, int version) {
     int numChunks = (totalDocs + numDocsPerChunk - 1) / numDocsPerChunk;
-    int headerSize = (numChunks + 7) * Integer.BYTES; // 7 items written before chunk indexing.
+    // 7 items written before chunk indexing.
+    int headerSize = (7 * Integer.BYTES) + (numChunks * VarByteChunkSingleValueWriter.FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE);
 
     _header = ByteBuffer.allocateDirect(headerSize);
 
@@ -196,7 +199,7 @@ protected void writeChunk() {
       throw new RuntimeException(e);
     }
 
-    _header.putInt(_dataOffset);
+    _header.putLong(_dataOffset);
     _dataOffset += sizeToWrite;
 
     _chunkBuffer.clear();

diff --git a/.../src/main/java/org/apache/pinot/core/io/writer/impl/v1/VarByteChunkSingleValueWriter.java b/.../src/main/java/org/apache/pinot/core/io/writer/impl/v1/VarByteChunkSingleValueWriter.java
@@ -49,7 +49,7 @@
  */
 @NotThreadSafe
 public class VarByteChunkSingleValueWriter extends BaseChunkSingleValueWriter {
-  private static final int CURRENT_VERSION = 2;
+  private static final int CURRENT_VERSION = 3;
   public static final int CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE = Integer.BYTES;
 
   private final int _chunkHeaderSize;

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNativeOrderLBuffer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNativeOrderLBuffer.java
@@ -43,7 +43,7 @@ static PinotNativeOrderLBuffer loadFile(File file, long offset, long size)
     return buffer;
   }
 
-  static PinotNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
+  public static PinotNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
       throws IOException {
     if (readOnly) {
       return new PinotNativeOrderLBuffer(new MMapBuffer(file, offset, size, MMapMode.READ_ONLY), true, false);

diff --git a/...t-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNonNativeOrderLBuffer.java b/...t-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNonNativeOrderLBuffer.java
@@ -43,7 +43,7 @@ static PinotNonNativeOrderLBuffer loadFile(File file, long offset, long size)
     return buffer;
   }
 
-  static PinotNonNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
+  public static PinotNonNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
       throws IOException {
     if (readOnly) {
       return new PinotNonNativeOrderLBuffer(new MMapBuffer(file, offset, size, MMapMode.READ_ONLY), true, false);

diff --git a/...test/java/org/apache/pinot/index/readerwriter/VarByteChunkSingleValueReaderWriteTest.java b/...test/java/org/apache/pinot/index/readerwriter/VarByteChunkSingleValueReaderWriteTest.java
@@ -21,16 +21,20 @@
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.ByteOrder;
 import java.nio.charset.Charset;
 import java.util.Random;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.RandomStringUtils;
+import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.core.io.compression.ChunkCompressorFactory;
 import org.apache.pinot.core.io.reader.impl.ChunkReaderContext;
 import org.apache.pinot.core.io.reader.impl.v1.VarByteChunkSingleValueReader;
 import org.apache.pinot.core.io.writer.impl.v1.VarByteChunkSingleValueWriter;
 import org.apache.pinot.core.segment.creator.impl.fwd.SingleValueVarByteRawIndexCreator;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
+import org.apache.pinot.core.segment.memory.PinotNativeOrderLBuffer;
+import org.apache.pinot.core.segment.memory.PinotNonNativeOrderLBuffer;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -113,27 +117,37 @@ public void test(ChunkCompressorFactory.CompressionType compressionType)
    * @throws IOException
    */
   @Test
-  public void testBackwardCompatibility()
-      throws IOException {
+  public void testBackwardCompatibilityV1()
+      throws Exception {
     String[] expected = new String[]{"abcde", "fgh", "ijklmn", "12345"};
+    testBackwardCompatibilityHelper("data/varByteStrings.v1", expected, 1009);
+  }
 
-    // Get v1 from resources folder
+  /**
+   * This test ensures that the reader can read in an data file from version 2.
+   */
+  @Test
+  public void testBackwardCompatibilityV2()
+      throws Exception {
+    String[] data = {"abcdefghijk", "12456887", "pqrstuv", "500"};
+    testBackwardCompatibilityHelper("data/varByteStringsCompressed.v2", data, 1000);
+    testBackwardCompatibilityHelper("data/varByteStringsRaw.v2", data, 1000);
+  }
+
+  private void testBackwardCompatibilityHelper(String fileName, String[] data, int numDocs)
+      throws Exception {
     ClassLoader classLoader = getClass().getClassLoader();
-    String fileName = "data/varByteStrings.v1";
     URL resource = classLoader.getResource(fileName);
     if (resource == null) {
       throw new RuntimeException("Input file not found: " + fileName);
     }
-
     File file = new File(resource.getFile());
     try (VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(
         PinotDataBuffer.mapReadOnlyBigEndianFile(file))) {
       ChunkReaderContext context = reader.createContext();
-
-      int numEntries = 1009; // Number of entries in the input file.
-      for (int i = 0; i < numEntries; i++) {
+      for (int i = 0; i < numDocs; i++) {
         String actual = reader.getString(i, context);
-        Assert.assertEquals(actual, expected[i % expected.length]);
+        Assert.assertEquals(actual, data[i % data.length]);
       }
     }
   }
@@ -173,7 +187,7 @@ private void testLargeVarcharHelper(ChunkCompressorFactory.CompressionType compr
     int maxStringLengthInBytes = 0;
     for (int i = 0; i < numDocs; i++) {
       expected[i] = RandomStringUtils.random(random.nextInt(numChars));
-      maxStringLengthInBytes = Math.max(maxStringLengthInBytes, expected[i].getBytes(UTF_8).length);
+      maxStringLengthInBytes = Math.max(maxStringLengthInBytes, StringUtil.encodeUtf8(expected[i]).length);
     }
 
     int numDocsPerChunk = SingleValueVarByteRawIndexCreator.getNumDocsPerChunk(maxStringLengthInBytes);
@@ -183,20 +197,44 @@ private void testLargeVarcharHelper(ChunkCompressorFactory.CompressionType compr
 
     for (int i = 0; i < numDocs; i += 2) {
       writer.setString(i, expected[i]);
-      writer.setBytes(i + 1, expected[i].getBytes(UTF_8));
+      writer.setBytes(i + 1, StringUtil.encodeUtf8(expected[i]));
     }
 
     writer.close();
 
-    try (VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(
-        PinotDataBuffer.mapReadOnlyBigEndianFile(outFile))) {
+    PinotDataBuffer buffer = PinotDataBuffer.mapReadOnlyBigEndianFile(outFile);
+    try (VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(buffer)) {
       ChunkReaderContext context = reader.createContext();
+      for (int i = 0; i < numDocs; i += 2) {
+        String actual = reader.getString(i, context);
+        Assert.assertEquals(actual, expected[i]);
+        byte[] expectedBytes = StringUtil.encodeUtf8(expected[i]);
+        Assert.assertEquals(StringUtil.encodeUtf8(actual), expectedBytes);
+        Assert.assertEquals(reader.getBytes(i + 1, context), expectedBytes);
+      }
+    }
+
+    // For large variable width column values (where total size of data
+    // across all rows in the segment is > 2GB), LBuffer will be used for
+    // reading the fwd index. However, to test this scenario the unit test
+    // will take a long time to execute due to comparison
+    // (75000 characters in each row and 10000 rows will hit this scenario).
+    // So we specifically test for mapping the index file into a LBuffer
+    // to exercise the LBuffer code
+    if (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN) {
+      buffer = PinotNativeOrderLBuffer.mapFile(outFile, true, 0, outFile.length());
+    } else {
+      buffer = PinotNonNativeOrderLBuffer.mapFile(outFile, true, 0, outFile.length());
+    }
 
+    try (VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(buffer)) {
+      ChunkReaderContext context = reader.createContext();
       for (int i = 0; i < numDocs; i += 2) {
         String actual = reader.getString(i, context);
         Assert.assertEquals(actual, expected[i]);
-        Assert.assertEquals(actual.getBytes(UTF_8), expected[i].getBytes(UTF_8));
-        Assert.assertEquals(reader.getBytes(i + 1), expected[i].getBytes(UTF_8));
+        byte[] expectedBytes = StringUtil.encodeUtf8(expected[i]);
+        Assert.assertEquals(StringUtil.encodeUtf8(actual), expectedBytes);
+        Assert.assertEquals(reader.getBytes(i + 1, context), expectedBytes);
       }
     }
 

diff --git a/pinot-core/src/test/resources/data/varByteStringsCompressed.v2 b/pinot-core/src/test/resources/data/varByteStringsCompressed.v2
diff --git a/pinot-core/src/test/resources/data/varByteStringsRaw.v2 b/pinot-core/src/test/resources/data/varByteStringsRaw.v2