Skip to content

Commit

Permalink
HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.…
Browse files Browse the repository at this point in the history
…seekToPreviousRow (#5373)

Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
  • Loading branch information
jbewing authored and bbeaudreault committed Nov 9, 2023
1 parent a5af15e commit 91a5eae
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
addCommandDescriptor(MetaWriteTest.class, "metaWrite",
"Populate meta table;used with 1 thread; to be cleaned up by cleanMeta");
addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)");
addCommandDescriptor(ReverseScanTest.class, "reverseScan",
"Run reverse scan test (read every row)");
addCommandDescriptor(FilteredScanTest.class, "filterScan",
"Run scan test using a filter to find a specific row based on it's value "
+ "(make sure to use --rows=20)");
Expand Down Expand Up @@ -2096,6 +2098,49 @@ boolean testRow(final int i, final long startTime) throws IOException {
}
}

static class ReverseScanTest extends TableTest {
private ResultScanner testScanner;

ReverseScanTest(Connection con, TestOptions options, Status status) {
super(con, options, status);
}

@Override
void testTakedown() throws IOException {
if (this.testScanner != null) {
this.testScanner.close();
}
super.testTakedown();
}

@Override
boolean testRow(final int i, final long startTime) throws IOException {
if (this.testScanner == null) {
Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
.setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType)
.setScanMetricsEnabled(true).setReversed(true);
for (int family = 0; family < opts.families; family++) {
byte[] familyName = Bytes.toBytes(FAMILY_NAME_BASE + family);
if (opts.addColumns) {
for (int column = 0; column < opts.columns; column++) {
byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
scan.addColumn(familyName, qualifier);
}
} else {
scan.addFamily(familyName);
}
}
if (opts.filterAll) {
scan.setFilter(new FilterAllFilter());
}
this.testScanner = table.getScanner(scan);
}
Result r = testScanner.next();
updateValueSize(r);
return true;
}
}

/**
* Base class for operations that are CAS-like; that read a value and then set it based off what
* they read. In this category is increment, append, checkAndPut, etc.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
Expand Down Expand Up @@ -146,7 +147,8 @@ public CellComparator getComparator() {
public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread,
boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
return new StoreFileScanner(this, getScanner(cacheBlocks, pread, isCompaction), !isCompaction,
reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn);
reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn,
reader.getDataBlockEncoding() == DataBlockEncoding.ROW_INDEX_V1);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ public class StoreFileScanner implements KeyValueScanner {
// A flag represents whether could stop skipping KeyValues for MVCC
// if have encountered the next row. Only used for reversed scan
private boolean stopSkippingKVsIfNextRow = false;
// A Cell that represents the row before the most previously seeked to row in seekToPreviousRow
private Cell previousRow = null;
// Whether the underlying HFile is using a data block encoding that has lower cost for seeking to
// a row from the beginning of a block (i.e. RIV1). If the data block encoding has a high cost for
// seeks, then we can use a modified reverse scanning algorithm to reduce seeks from the beginning
// of the block
private final boolean isFastSeekingEncoding;

private static LongAdder seekCount;

Expand All @@ -83,16 +90,21 @@ public class StoreFileScanner implements KeyValueScanner {
* {@link KeyValueScanner#getScannerOrder()}.
* @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
* otherwise {@code false}. This is a hint for optimization.
* @param isFastSeekingEncoding {@code true} if the data block encoding can seek quickly
* from the beginning of a block (i.e. RIV1), otherwise
* {@code false}. This is a hint for optimization.
*/
public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn,
boolean isFastSeekingEncoding) {
this.readPt = readPt;
this.reader = reader;
this.hfs = hfs;
this.enforceMVCC = useMVCC;
this.hasMVCCInfo = hasMVCC;
this.scannerOrder = scannerOrder;
this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
this.isFastSeekingEncoding = isFastSeekingEncoding;
this.reader.incrementRefCount();
}

Expand Down Expand Up @@ -226,6 +238,7 @@ public boolean seek(Cell key) throws IOException {
}
} finally {
realSeekDone = true;
previousRow = null;
}
} catch (FileNotFoundException e) {
throw e;
Expand Down Expand Up @@ -253,6 +266,7 @@ public boolean reseek(Cell key) throws IOException {
}
} finally {
realSeekDone = true;
previousRow = null;
}
} catch (FileNotFoundException e) {
throw e;
Expand Down Expand Up @@ -486,50 +500,198 @@ public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS)
@Override
public boolean seekToPreviousRow(Cell originalKey) throws IOException {
try {
try {
boolean keepSeeking = false;
Cell key = originalKey;
do {
Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
if (seekCount != null) seekCount.increment();
if (!hfs.seekBefore(seekKey)) {
this.cur = null;
return false;
}
Cell curCell = hfs.getCell();
Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell);

if (seekCount != null) seekCount.increment();
if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
this.cur = null;
return false;
}

setCurrentCell(hfs.getCell());
this.stopSkippingKVsIfNextRow = true;
boolean resultOfSkipKVs;
try {
resultOfSkipKVs = skipKVsNewerThanReadpoint();
} finally {
this.stopSkippingKVsIfNextRow = false;
}
if (!resultOfSkipKVs || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) {
keepSeeking = true;
key = firstKeyOfPreviousRow;
continue;
} else {
keepSeeking = false;
}
} while (keepSeeking);
return true;
} finally {
realSeekDone = true;
if (isFastSeekingEncoding) {
return seekToPreviousRowStateless(originalKey);
} else if (previousRow == null || getComparator().compareRows(previousRow, originalKey) > 0) {
return seekToPreviousRowWithoutHint(originalKey);
} else {
return seekToPreviousRowWithHint();
}
} catch (FileNotFoundException e) {
throw e;
} catch (IOException ioe) {
throw new IOException("Could not seekToPreviousRow " + this + " to key " + originalKey, ioe);
} finally {
this.realSeekDone = true;
}
}

/**
* This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires one seek
* and one reseek. This method maintains state in {@link StoreFileScanner#previousRow} which only
* makes sense in the context of a sequential row-by-row reverse scan.
* {@link StoreFileScanner#previousRow} should be reset if that is not the case. The reasoning for
* why this method is faster than {@link StoreFileScanner#seekToPreviousRowStateless(Cell)} is
* that seeks are slower as they need to start from the beginning of the file, while reseeks go
* forward from the current position.
*/
private boolean seekToPreviousRowWithHint() throws IOException {
do {
// Using our existing seek hint, set our next seek hint
Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(previousRow);
seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);

// Reseek back to our initial seek hint (i.e. what we think is the start of the
// previous row)
if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
return false;
}

// If after skipping newer Kvs, we're still in our seek hint row, then we're finished
if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
return true;
}

// If the previousRow seek hint is missing, that means that we're at row after the first row
// in the storefile. Use the without-hint seek path to process the final row
if (previousRow == null) {
return seekToPreviousRowWithoutHint(firstKeyOfPreviousRow);
}

// Otherwise, use the previousRow seek hint to continue traversing backwards
} while (true);
}

/**
* This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks
* and one reseek. The extra expense/seek is with the intent of speeding up subsequent calls by
* using the {@link StoreFileScanner#seekToPreviousRowWithHint} which this method seeds the state
* for by setting {@link StoreFileScanner#previousRow}
*/
private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws IOException {
// Rewind to the cell before the beginning of this row
Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(originalKey);
if (!seekBefore(keyAtBeginningOfRow)) {
return false;
}

// Rewind before this row and save what we find as a seek hint
Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);

// Seek back to the start of the previous row
if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
return false;
}

// If after skipping newer Kvs, we're still in what we thought was the previous
// row, then we can exit
if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
return true;
}

// Skipping newer kvs resulted in skipping the entire row that we thought was the
// previous row. If we've set a seek hint, then we can use that to go backwards
// further
if (previousRow != null) {
return seekToPreviousRowWithHint();
}

// If we've made it here, then we weren't able to set a seek hint. This can happen
// only if we're at the beginning of the storefile i.e. there is no row before this
// one
return false;
}

/**
* This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks.
* It should be used if the cost for seeking is lower i.e. when using a fast seeking data block
* encoding like RIV1.
*/
private boolean seekToPreviousRowStateless(Cell originalKey) throws IOException {
Cell key = originalKey;
do {
Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
if (!seekBefore(keyAtBeginningOfRow)) {
return false;
}

Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
if (!seekAtOrAfter(firstKeyOfPreviousRow)) {
return false;
}

if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
return true;
}
key = firstKeyOfPreviousRow;
} while (true);
}

private boolean seekBefore(Cell seekKey) throws IOException {
if (seekCount != null) {
seekCount.increment();
}
if (!hfs.seekBefore(seekKey)) {
this.cur = null;
return false;
}

return true;
}

/**
* Seeks before the seek target cell and saves the location to {@link #previousRow}. If there
* doesn't exist a KV in this file before the seek target cell, reposition the scanner at the
* beginning of the storefile (in preparation to a reseek at or after the seek key) and set the
* {@link #previousRow} to null. If {@link #previousRow} is ever non-null and then transitions to
* being null again via this method, that's because there doesn't exist a row before the seek
* target in the storefile (i.e. we're at the beginning of the storefile)
*/
private void seekBeforeAndSaveKeyToPreviousRow(Cell seekKey) throws IOException {
if (seekCount != null) {
seekCount.increment();
}
if (!hfs.seekBefore(seekKey)) {
// Since the above seek failed, we need to position ourselves back at the start of the
// block or else our reseek might fail. seekTo() cannot return false here as at least
// one seekBefore will have returned true by the time we get here
hfs.seekTo();
this.previousRow = null;
} else {
this.previousRow = hfs.getCell();
}
}

private boolean seekAtOrAfter(Cell seekKey) throws IOException {
if (seekCount != null) {
seekCount.increment();
}
if (!seekAtOrAfter(hfs, seekKey)) {
this.cur = null;
return false;
}

return true;
}

private boolean reseekAtOrAfter(Cell seekKey) throws IOException {
if (seekCount != null) {
seekCount.increment();
}
if (!reseekAtOrAfter(hfs, seekKey)) {
this.cur = null;
return false;
}

return true;
}

private boolean isStillAtSeekTargetAfterSkippingNewerKvs(Cell seekKey) throws IOException {
setCurrentCell(hfs.getCell());
return skipKvsNewerThanReadpointReversed() && getComparator().compareRows(cur, seekKey) <= 0;
}

private boolean skipKvsNewerThanReadpointReversed() throws IOException {
this.stopSkippingKVsIfNextRow = true;
boolean resultOfSkipKVs;
try {
resultOfSkipKVs = skipKVsNewerThanReadpoint();
} finally {
this.stopSkippingKVsIfNextRow = false;
}

return resultOfSkipKVs;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ public void testStoreFileReference() throws Exception {
StoreFileReader r = file.getReader();
assertNotNull(r);
StoreFileScanner scanner =
new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false);
new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);

// Verify after instantiating scanner refCount is increased
assertTrue("Verify file is being referenced", file.isReferencedInReads());
Expand All @@ -297,7 +297,7 @@ public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
try (StoreFileScanner scanner =
new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true)) {
new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
Scan scan = new Scan();
scan.setColumnFamilyTimeRange(cf, 0, 1);
assertFalse(scanner.shouldUseScanner(scan, store, 0));
Expand Down

0 comments on commit 91a5eae

Please sign in to comment.