apache · ramkrish86 · Nov 16, 2020 · Dec 1, 2020 · Dec 2, 2020 · Dec 2, 2020
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -327,6 +327,7 @@ protected static class HFileScannerImpl implements HFileScanner {
     private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue();
     // A pair for reusing in blockSeek() so that we don't garbage lot of objects
     final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>();
+    private boolean seekToSameBlock;
 
     /**
      * The next indexed key is to keep track of the indexed key of the next data block.
@@ -385,6 +386,11 @@ public boolean isSeeked(){
       return blockBuffer != null;
     }
 
+    @Override
+    public boolean isSeekToSameBlock() {
+      return seekToSameBlock;
+    }
+
     @Override
     public String toString() {
       return "HFileScanner for reader " + String.valueOf(getReader());
@@ -981,8 +987,11 @@ protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
         Cell key, boolean seekBefore) throws IOException {
       if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
         updateCurrentBlock(seekToBlock);
+        seekToSameBlock = true;
       } else if (rewind) {
         blockBuffer.rewind();
+      } else {
+        seekToSameBlock = true;
       }
       // Update the nextIndexedKey
       this.nextIndexedKey = nextIndexedKey;

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
@@ -163,6 +163,13 @@ public interface HFileScanner extends Shipper, Closeable {
    */
   Cell getNextIndexedKey();
 
+  /**
+   * @return true if we seeked to the current block(based on the seek key)
+   */
+  default boolean isSeekToSameBlock() {
+    return false;
+  }
+
   /**
    * Close this HFile scanner and do necessary cleanup.
    */

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
@@ -431,6 +431,11 @@ public Cell getNextIndexedKey() {
     return current == null ? null : current.getNextIndexedKey();
   }
 
+  @Override
+  public boolean isSeekToSameBlock() {
+    return current == null ? false : current.isSeekToSameBlock();
+  }
+
   @Override
   public void shipped() throws IOException {
     for (KeyValueScanner scanner : this.scannersForDelayedClose) {

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
@@ -181,4 +181,11 @@ boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
    * see HFileWriterImpl#getMidpoint, or null if not known.
    */
   public Cell getNextIndexedKey();
+
+  /**
+   * @return true if we seeked to the current block(based on the seek key)
+   */
+  public default boolean isSeekToSameBlock() {
+    return false;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -575,4 +575,9 @@ public Cell getNextIndexedKey() {
   public void shipped() throws IOException {
     this.hfs.shipped();
   }
+
+  @Override
+  public boolean isSeekToSameBlock() {
+    return hfs.isSeekToSameBlock();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@@ -113,7 +113,9 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
   private Cell prevCell = null;
 
   private final long preadMaxBytes;
+  private final long switchToNextOnlyBytes;
   private long bytesRead;
+  private boolean seekToSameBlock;
 
   /** We don't ever expect to change this, the constant is just for clarity. */
   static final boolean LAZY_SEEK_ENABLED_BY_DEFAULT = true;
@@ -205,6 +207,9 @@ private StoreScanner(HStore store, Scan scan, ScanInfo scanInfo,
       this.scanUsePread = this.readType != Scan.ReadType.STREAM;
     }
     this.preadMaxBytes = scanInfo.getPreadMaxBytes();
+    // TODO : Introduce config here at the ScanInfo level. Determine based on number of blocks
+    // rather than bytes read?
+    this.switchToNextOnlyBytes = this.preadMaxBytes;
     this.cellsPerHeartbeatCheck = scanInfo.getCellsPerTimeoutCheck();
     // Parallel seeking is on if the config allows and more there is more than one store file.
     if (store != null && store.getStorefilesCount() > 1) {
@@ -684,7 +689,7 @@ public boolean next(List<Cell> outResult, ScannerContext scannerContext) throws
               matcher.clearCurrentRow();
               seekOrSkipToNextRow(cell);
             } else if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL) {
-              seekOrSkipToNextColumn(cell);
+              doSeekCol(cell);
             } else {
               this.heap.next();
             }
@@ -728,7 +733,7 @@ public boolean next(List<Cell> outResult, ScannerContext scannerContext) throws
             break;
 
           case SEEK_NEXT_COL:
-            seekOrSkipToNextColumn(cell);
+            doSeekCol(cell);
             NextState stateAfterSeekNextColumn = needToReturn(outResult);
             if (stateAfterSeekNextColumn != null) {
               return scannerContext.setScannerState(stateAfterSeekNextColumn).hasMoreValues();
@@ -786,6 +791,19 @@ private void updateMetricsStore(boolean memstoreRead) {
     }
   }
 
+  private void doSeekCol(Cell cell) throws IOException {
+    // we check when ever a seek_next_col happens did the seek really land in a new block.
+    // If the seek always lands in the same current block while trying to do a next,
+    // we tend to go with next() rather than seek() based on the 'seekToSameBlock'
+    // which is updated in the method 'seekOrSkipToNextColumn'
+    if (seekToSameBlock && bytesRead > switchToNextOnlyBytes) {
+      // forcefully make it to next only
+      this.heap.next();
+    } else {
+      seekOrSkipToNextColumn(cell);
+    }
+  }
+
   /**
    * If the top cell won't be flushed into disk, the new top cell may be
    * changed after #reopenAfterFlush. Because the older top cell only exist
@@ -817,7 +835,13 @@ private void seekOrSkipToNextRow(Cell cell) throws IOException {
 
   private void seekOrSkipToNextColumn(Cell cell) throws IOException {
     if (!trySkipToNextColumn(cell)) {
+      boolean prevIndexKeyNull = (getNextIndexedKey() == null);
       seekAsDirection(matcher.getKeyForNextColumn(cell));
+      if (prevIndexKeyNull) {
+        // even if one seek has lead to another block - reset to false.
+        // TODO : For SEEK_NEXT_ROW also?
+        seekToSameBlock = this.heap.isSeekToSameBlock();
+      }
     }
   }
 
@@ -1235,6 +1259,11 @@ public Cell getNextIndexedKey() {
     return this.heap.getNextIndexedKey();
   }
 
+  @Override
+  public boolean isSeekToSameBlock() {
+    return this.heap.isSeekToSameBlock();
+  }
+
   @Override
   public void shipped() throws IOException {
     if (prevCell != null) {