Skip to content

Commit

Permalink
HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of t…
Browse files Browse the repository at this point in the history
…he data

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
      Cleanup trace message and include offset; makes debug the easier.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
      Fix incorrect data member javadoc.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
      Pass along the offset we are checksumming at.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpljava
      Add trace logging for debugging and set the end of the prefetch to be
      last datablock, not size minus trailersize (there is the root indices
      and file info to be skipped)
  • Loading branch information
saintstack committed Feb 10, 2016
1 parent 7cab247 commit b6328eb
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ public class ChecksumUtil {
/** This is used to reserve space in a byte buffer */
private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];

/**
* This is used by unit tests to make checksum failures throw an
* exception instead of returning null. Returning a null value from
* checksum validation will cause the higher layer to retry that
* read with hdfs-level checksums. Instead, we would like checksum
/**
* This is used by unit tests to make checksum failures throw an
* exception instead of returning null. Returning a null value from
* checksum validation will cause the higher layer to retry that
* read with hdfs-level checksums. Instead, we would like checksum
* failures to cause the entire unit test to fail.
*/
private static boolean generateExceptions = false;
Expand Down Expand Up @@ -86,7 +86,7 @@ static void generateChecksums(byte[] indata, int startOffset, int endOffset,
* The header is extracted from the specified HFileBlock while the
* data-to-be-verified is extracted from 'data'.
*/
static boolean validateBlockChecksum(String pathName, HFileBlock block,
static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
byte[] data, int hdrSize) throws IOException {

// If this is an older version of the block that does not have
Expand All @@ -100,7 +100,7 @@ static boolean validateBlockChecksum(String pathName, HFileBlock block,
}

// Get a checksum object based on the type of checksum that is
// set in the HFileBlock header. A ChecksumType.NULL indicates that
// set in the HFileBlock header. A ChecksumType.NULL indicates that
// the caller is not interested in validating checksums, so we
// always return true.
ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
Expand All @@ -116,12 +116,13 @@ static boolean validateBlockChecksum(String pathName, HFileBlock block,
assert dataChecksum != null;
int sizeWithHeader = block.getOnDiskDataSizeWithHeader();
if (LOG.isTraceEnabled()) {
LOG.info("length of data = " + data.length
+ " OnDiskDataSizeWithHeader = " + sizeWithHeader
+ " checksum type = " + cktype.getName()
+ " file =" + pathName
+ " header size = " + hdrSize
+ " bytesPerChecksum = " + bytesPerChecksum);
LOG.info("dataLength=" + data.length
+ ", sizeWithHeader=" + sizeWithHeader
+ ", checksumType=" + cktype.getName()
+ ", file=" + pathName
+ ", offset=" + offset
+ ", headerSize=" + hdrSize
+ ", bytesPerChecksum=" + bytesPerChecksum);
}
try {
dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
Expand All @@ -140,7 +141,7 @@ static boolean validateBlockChecksum(String pathName, HFileBlock block,
* @return The number of bytes needed to store the checksum values
*/
static long numBytes(long datasize, int bytesPerChecksum) {
return numChunks(datasize, bytesPerChecksum) *
return numChunks(datasize, bytesPerChecksum) *
HFileBlock.CHECKSUM_SIZE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,14 @@
* trailer size is fixed within a given {@link HFile} format version only, but
* we always store the version number as the last four-byte integer of the file.
* The version number itself is split into two portions, a major
* version and a minor version.
* The last three bytes of a file is the major
* version and a minor version. The last three bytes of a file are the major
* version and a single preceding byte is the minor number. The major version
* determines which readers/writers to use to read/write a hfile while a minor
* version determines smaller changes in hfile format that do not need a new
* reader/writer type.
*/
@InterfaceAudience.Private
public class FixedFileTrailer {

/**
* We store the comparator class name as a fixed-length field in the trailer.
*/
Expand All @@ -67,8 +65,9 @@ public class FixedFileTrailer {
/**
* In version 1, the offset to the data block index. Starting from version 2,
* the meaning of this field is the offset to the section of the file that
* should be loaded at the time the file is being opened, and as of the time
* of writing, this happens to be the offset of the file info section.
* should be loaded at the time the file is being opened: i.e. on open we load
* the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2
* in the reference guide.
*/
private long loadOnOpenDataOffset;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1694,7 +1694,7 @@ private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
b.assumeUncompressed();
}

if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) {
return null; // checksum mismatch
}

Expand Down Expand Up @@ -1743,9 +1743,10 @@ public HFileBlockDecodingContext getDefaultBlockDecodingContext() {
* If there is a checksum mismatch, then return false. Otherwise
* return true.
*/
protected boolean validateBlockChecksum(HFileBlock block, byte[] data, int hdrSize)
throws IOException {
return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize);
protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
int hdrSize)
throws IOException {
return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,14 @@ public HFileReaderImpl(final Path path, FixedFileTrailer trailer,
if (cacheConf.shouldPrefetchOnOpen()) {
PrefetchExecutor.request(path, new Runnable() {
public void run() {
long offset = 0;
long end = 0;
try {
long offset = 0;
long end = fileSize - getTrailer().getTrailerSize();
end = getTrailer().getLoadOnOpenDataOffset();
HFileBlock prevBlock = null;
if (LOG.isTraceEnabled()) {
LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end);
}
while (offset < end) {
if (Thread.interrupted()) {
break;
Expand All @@ -273,11 +277,11 @@ public void run() {
} catch (IOException e) {
// IOExceptions are probably due to region closes (relocation, etc.)
if (LOG.isTraceEnabled()) {
LOG.trace("Exception encountered while prefetching " + path + ":", e);
LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
}
} catch (Exception e) {
// Other exceptions are interesting
LOG.warn("Exception encountered while prefetching " + path + ":", e);
LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
} finally {
PrefetchExecutor.complete(path);
}
Expand Down Expand Up @@ -1457,9 +1461,11 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
if (dataBlockIndexReader == null) {
throw new IOException("Block index not loaded");
}
if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
long trailerOffset = trailer.getLoadOnOpenDataOffset();
if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
throw new IOException("Requested block is out of range: " + dataBlockOffset +
", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
}
// For any given block from any given file, synchronize reads for said
// block.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSys
}

@Override
protected boolean validateBlockChecksum(HFileBlock block,
protected boolean validateBlockChecksum(HFileBlock block, long offset,
byte[] data, int hdrSize) throws IOException {
return false; // checksum validation failure
}
Expand Down

0 comments on commit b6328eb

Please sign in to comment.