Skip to content

Commit

Permalink
HBASE-13451 - Make the HFileBlockIndex blockKeys to Cells so that it …
Browse files Browse the repository at this point in the history
…could

be easy to use in the CellComparators (Ram)
  • Loading branch information
ramkrish86 committed Jun 9, 2015
1 parent c62b396 commit 487e4aa
Show file tree
Hide file tree
Showing 23 changed files with 441 additions and 315 deletions.
Expand Up @@ -770,8 +770,10 @@ public static String getCellKeyAsString(Cell cell) {
sb.append(KeyValue.humanReadableTimestamp(cell.getTimestamp())); sb.append(KeyValue.humanReadableTimestamp(cell.getTimestamp()));
sb.append('/'); sb.append('/');
sb.append(Type.codeToType(cell.getTypeByte())); sb.append(Type.codeToType(cell.getTypeByte()));
sb.append("/vlen="); if (!(cell instanceof KeyValue.KeyOnlyKeyValue)) {
sb.append(cell.getValueLength()); sb.append("/vlen=");
sb.append(cell.getValueLength());
}
sb.append("/seqid="); sb.append("/seqid=");
sb.append(cell.getSequenceId()); sb.append(cell.getSequenceId());
return sb.toString(); return sb.toString();
Expand Down
46 changes: 10 additions & 36 deletions hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
Expand Up @@ -2074,24 +2074,6 @@ public static byte[][] toBinaryByteArrays(final String[] t) {
return result; return result;
} }


/**
* Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR
*
* @param arr array of byte arrays to search for
* @param key the key you want to find
* @param offset the offset in the key you want to find
* @param length the length of the key
* @return zero-based index of the key, if the key is present in the array.
* Otherwise, a value -(i + 1) such that the key is between arr[i -
* 1] and arr[i] non-inclusively, where i is in [0, i], if we define
* arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
* means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1.
*/
public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
return binarySearch(arr, key, offset, length, (CellComparator) null);
}

/** /**
* Binary search for keys in indexes. * Binary search for keys in indexes.
* *
Expand All @@ -2111,41 +2093,35 @@ public static int binarySearch(byte[][] arr, byte[] key, int offset, int length)
@Deprecated @Deprecated
public static int binarySearch(byte [][]arr, byte []key, int offset, public static int binarySearch(byte [][]arr, byte []key, int offset,
int length, RawComparator<?> comparator) { int length, RawComparator<?> comparator) {
return binarySearch(arr, key, offset, length, (CellComparator)null); return binarySearch(arr, key, offset, length);
} }


/** /**
* Binary search for keys in indexes. * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR.
* *
* @param arr array of byte arrays to search for * @param arr array of byte arrays to search for
* @param key the key you want to find * @param key the key you want to find
* @param offset the offset in the key you want to find * @param offset the offset in the key you want to find
* @param length the length of the key * @param length the length of the key
* @param comparator a comparator to compare.
* @return zero-based index of the key, if the key is present in the array. * @return zero-based index of the key, if the key is present in the array.
* Otherwise, a value -(i + 1) such that the key is between arr[i - * Otherwise, a value -(i + 1) such that the key is between arr[i -
* 1] and arr[i] non-inclusively, where i is in [0, i], if we define * 1] and arr[i] non-inclusively, where i is in [0, i], if we define
* arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
* means that this function can return 2N + 1 different values * means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1. * ranging from -(N + 1) to N - 1.
*/ */
public static int binarySearch(byte [][]arr, byte []key, int offset, public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
int length, CellComparator comparator) {
int low = 0; int low = 0;
int high = arr.length - 1; int high = arr.length - 1;


KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
r.setKey(key, offset, length); r.setKey(key, offset, length);
while (low <= high) { while (low <= high) {
int mid = (low+high) >>> 1; int mid = (low + high) >>> 1;
// we have to compare in this order, because the comparator order // we have to compare in this order, because the comparator order
// has special logic when the 'left side' is a special key. // has special logic when the 'left side' is a special key.
int cmp = 0; int cmp = Bytes.BYTES_RAWCOMPARATOR
if (comparator != null) { .compare(key, offset, length, arr[mid], 0, arr[mid].length);
cmp = comparator.compare(r, arr[mid], 0, arr[mid].length);
} else {
cmp = Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length);
}
// key lives above the midpoint // key lives above the midpoint
if (cmp > 0) if (cmp > 0)
low = mid + 1; low = mid + 1;
Expand All @@ -2156,7 +2132,7 @@ else if (cmp < 0)
else else
return mid; return mid;
} }
return - (low+1); return -(low + 1);
} }


/** /**
Expand All @@ -2172,7 +2148,7 @@ else if (cmp < 0)
* means that this function can return 2N + 1 different values * means that this function can return 2N + 1 different values
* ranging from -(N + 1) to N - 1. * ranging from -(N + 1) to N - 1.
* @return the index of the block * @return the index of the block
* @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)} * @deprecated Use {@link Bytes#binarySearch(Cell[], Cell, CellComparator)}
*/ */
@Deprecated @Deprecated
public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) { public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
Expand Down Expand Up @@ -2212,16 +2188,14 @@ else if (cmp < 0)
* ranging from -(N + 1) to N - 1. * ranging from -(N + 1) to N - 1.
* @return the index of the block * @return the index of the block
*/ */
public static int binarySearch(byte[][] arr, Cell key, Comparator<Cell> comparator) { public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) {
int low = 0; int low = 0;
int high = arr.length - 1; int high = arr.length - 1;
KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
while (low <= high) { while (low <= high) {
int mid = (low+high) >>> 1; int mid = (low+high) >>> 1;
// we have to compare in this order, because the comparator order // we have to compare in this order, because the comparator order
// has special logic when the 'left side' is a special key. // has special logic when the 'left side' is a special key.
r.setKey(arr[mid], 0, arr[mid].length); int cmp = comparator.compare(key, arr[mid]);
int cmp = comparator.compare(key, r);
// key lives above the midpoint // key lives above the midpoint
if (cmp > 0) if (cmp > 0)
low = mid + 1; low = mid + 1;
Expand Down
Expand Up @@ -59,7 +59,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {


protected final Cell splitCell; protected final Cell splitCell;


private byte[] firstKey = null; private Cell firstKey = null;


private boolean firstKeySeeked = false; private boolean firstKeySeeked = false;


Expand Down Expand Up @@ -262,7 +262,7 @@ public int reseekTo(Cell key) throws IOException {
@Override @Override
public boolean seekBefore(Cell key) throws IOException { public boolean seekBefore(Cell key) throws IOException {
if (top) { if (top) {
Cell fk = new KeyValue.KeyOnlyKeyValue(getFirstKey(), 0, getFirstKey().length); Cell fk = getFirstKey();
if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) { if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) {
return false; return false;
} }
Expand Down Expand Up @@ -319,18 +319,18 @@ public byte[] getLastKey() {
} }


@Override @Override
public byte[] midkey() throws IOException { public Cell midkey() throws IOException {
// Returns null to indicate file is not splitable. // Returns null to indicate file is not splitable.
return null; return null;
} }


@Override @Override
public byte[] getFirstKey() { public Cell getFirstKey() {
if (!firstKeySeeked) { if (!firstKeySeeked) {
HFileScanner scanner = getScanner(true, true, false); HFileScanner scanner = getScanner(true, true, false);
try { try {
if (scanner.seekTo()) { if (scanner.seekTo()) {
this.firstKey = Bytes.toBytes(scanner.getKey()); this.firstKey = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(scanner.getKey()));
} }
firstKeySeeked = true; firstKeySeeked = true;
} catch (IOException e) { } catch (IOException e) {
Expand Down
Expand Up @@ -17,7 +17,7 @@
* limitations under the License. * limitations under the License.
*/ */


package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.io.hfile;


import java.io.DataInput; import java.io.DataInput;
import java.io.IOException; import java.io.IOException;
Expand All @@ -26,15 +26,15 @@
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.util.BloomFilter;
import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; import org.apache.hadoop.hbase.util.BloomFilterUtil;
import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;


/** /**
* A Bloom filter implementation built on top of {@link BloomFilterChunk}, * A Bloom filter implementation built on top of
* encapsulating a set of fixed-size Bloom filters written out at the time of * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
* a set of fixed-size Bloom filters written out at the time of
* {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
* block stream, and loaded on demand at query time. This class only provides * block stream, and loaded on demand at query time. This class only provides
* reading capabilities. * reading capabilities.
Expand Down Expand Up @@ -84,7 +84,11 @@ public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
throw new IllegalArgumentException("Invalid hash type: " + hashType); throw new IllegalArgumentException("Invalid hash type: " + hashType);
} }
// We will pass null for ROW block // We will pass null for ROW block
index = new HFileBlockIndex.BlockIndexReader(comparator, 1); if(comparator == null) {
index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
} else {
index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
}
index.readRootIndex(meta, numChunks); index.readRootIndex(meta, numChunks);
} }


Expand Down
Expand Up @@ -17,9 +17,10 @@
* limitations under the License. * limitations under the License.
*/ */


package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.io.hfile;


import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.BloomFilterBase;


import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellComparator;


Expand Down Expand Up @@ -47,7 +48,7 @@ public class CompoundBloomFilterBase implements BloomFilterBase {
protected long totalByteSize; protected long totalByteSize;
protected long totalMaxKeys; protected long totalMaxKeys;


/** Hash function type to use, as defined in {@link Hash} */ /** Hash function type to use, as defined in {@link org.apache.hadoop.hbase.util.Hash} */
protected int hashType; protected int hashType;
/** Comparator used to compare Bloom filter keys */ /** Comparator used to compare Bloom filter keys */
protected CellComparator comparator; protected CellComparator comparator;
Expand Down
Expand Up @@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.io.hfile;


import java.io.DataInput; import java.io.DataInput;
import java.io.DataOutput; import java.io.DataOutput;
Expand All @@ -29,9 +29,10 @@
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.util.BloomFilterChunk;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex; import org.apache.hadoop.hbase.util.BloomFilterUtil;
import org.apache.hadoop.hbase.io.hfile.InlineBlockWriter; import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;


/** /**
Expand Down
Expand Up @@ -394,13 +394,13 @@ public interface Reader extends Closeable, CachingBlockReader {


byte[] getLastKey(); byte[] getLastKey();


byte[] midkey() throws IOException; Cell midkey() throws IOException;


long length(); long length();


long getEntries(); long getEntries();


byte[] getFirstKey(); Cell getFirstKey();


long indexSize(); long indexSize();


Expand Down

0 comments on commit 487e4aa

Please sign in to comment.