HBASE-13451 - Make the HFileBlockIndex blockKeys to Cells so that it …

…could be easy to use in the CellComparators (Ram)
apache · Jun 9, 2015 · 487e4aa · 487e4aa
1 parent c62b396
commit 487e4aa
Show file tree

Hide file tree

Showing 23 changed files with 441 additions and 315 deletions.
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
@@ -770,8 +770,10 @@ public static String getCellKeyAsString(Cell cell) {
     sb.append(KeyValue.humanReadableTimestamp(cell.getTimestamp()));
     sb.append('/');
     sb.append(Type.codeToType(cell.getTypeByte()));
-    sb.append("/vlen=");
+    if (!(cell instanceof KeyValue.KeyOnlyKeyValue)) {
-    sb.append(cell.getValueLength());
+      sb.append("/vlen=");
+      sb.append(cell.getValueLength());
+    }
     sb.append("/seqid=");
     sb.append(cell.getSequenceId());
     return sb.toString();

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -2074,24 +2074,6 @@ public static byte[][] toBinaryByteArrays(final String[] t) {
     return result;
   }
 
-  /**
-   * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR
-   *
-   * @param arr array of byte arrays to search for
-   * @param key the key you want to find
-   * @param offset the offset in the key you want to find
-   * @param length the length of the key
-   * @return zero-based index of the key, if the key is present in the array.
-   *         Otherwise, a value -(i + 1) such that the key is between arr[i -
-   *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
-   *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
-   *         means that this function can return 2N + 1 different values
-   *         ranging from -(N + 1) to N - 1.
-   */
-  public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
-    return binarySearch(arr, key, offset, length, (CellComparator) null);
-  }
-
   /**
    * Binary search for keys in indexes.
    *
@@ -2111,41 +2093,35 @@ public static int binarySearch(byte[][] arr, byte[] key, int offset, int length)
   @Deprecated
   public static int binarySearch(byte [][]arr, byte []key, int offset,
       int length, RawComparator<?> comparator) {
-    return binarySearch(arr, key, offset, length, (CellComparator)null);
+    return binarySearch(arr, key, offset, length);
   }
 
   /**
-   * Binary search for keys in indexes.
+   * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR.
    *
    * @param arr array of byte arrays to search for
    * @param key the key you want to find
    * @param offset the offset in the key you want to find
    * @param length the length of the key
-   * @param comparator a comparator to compare.
    * @return zero-based index of the key, if the key is present in the array.
    *         Otherwise, a value -(i + 1) such that the key is between arr[i -
    *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
    *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
    *         means that this function can return 2N + 1 different values
    *         ranging from -(N + 1) to N - 1.
    */
-  public static int binarySearch(byte [][]arr, byte []key, int offset,
+  public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
-      int length, CellComparator comparator) {
     int low = 0;
     int high = arr.length - 1;
 
     KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
     r.setKey(key, offset, length);
     while (low <= high) {
-      int mid = (low+high) >>> 1;
+      int mid = (low + high) >>> 1;
       // we have to compare in this order, because the comparator order
       // has special logic when the 'left side' is a special key.
-      int cmp = 0;
+      int cmp = Bytes.BYTES_RAWCOMPARATOR
-      if (comparator != null) {
+          .compare(key, offset, length, arr[mid], 0, arr[mid].length);
-        cmp = comparator.compare(r, arr[mid], 0, arr[mid].length);
-      } else {
-        cmp = Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length);
-      }
       // key lives above the midpoint
       if (cmp > 0)
         low = mid + 1;
@@ -2156,7 +2132,7 @@ else if (cmp < 0)
       else
         return mid;
     }
-    return - (low+1);
+    return -(low + 1);
   }
 
   /**
@@ -2172,7 +2148,7 @@ else if (cmp < 0)
    *         means that this function can return 2N + 1 different values
    *         ranging from -(N + 1) to N - 1.
    * @return the index of the block
-   * @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)}
+   * @deprecated Use {@link Bytes#binarySearch(Cell[], Cell, CellComparator)}
    */
   @Deprecated
   public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
@@ -2212,16 +2188,14 @@ else if (cmp < 0)
    *         ranging from -(N + 1) to N - 1.
    * @return the index of the block
    */
-  public static int binarySearch(byte[][] arr, Cell key, Comparator<Cell> comparator) {
+  public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) {
     int low = 0;
     int high = arr.length - 1;
-    KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
     while (low <= high) {
       int mid = (low+high) >>> 1;
       // we have to compare in this order, because the comparator order
       // has special logic when the 'left side' is a special key.
-      r.setKey(arr[mid], 0, arr[mid].length);
+      int cmp = comparator.compare(key, arr[mid]);
-      int cmp = comparator.compare(key, r);
       // key lives above the midpoint
       if (cmp > 0)
         low = mid + 1;

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
@@ -59,7 +59,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {
 
   protected final Cell splitCell;
 
-  private byte[] firstKey = null;
+  private Cell firstKey = null;
 
   private boolean firstKeySeeked = false;
 
@@ -262,7 +262,7 @@ public int reseekTo(Cell key) throws IOException {
       @Override
       public boolean seekBefore(Cell key) throws IOException {
         if (top) {
-          Cell fk = new KeyValue.KeyOnlyKeyValue(getFirstKey(), 0, getFirstKey().length);
+          Cell fk = getFirstKey();
           if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) {
             return false;
           }
@@ -319,18 +319,18 @@ public byte[] getLastKey() {
   }
 
   @Override
-  public byte[] midkey() throws IOException {
+  public Cell midkey() throws IOException {
     // Returns null to indicate file is not splitable.
     return null;
   }
 
   @Override
-  public byte[] getFirstKey() {
+  public Cell getFirstKey() {
     if (!firstKeySeeked) {
       HFileScanner scanner = getScanner(true, true, false);
       try {
         if (scanner.seekTo()) {
-          this.firstKey = Bytes.toBytes(scanner.getKey());
+          this.firstKey = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(scanner.getKey()));
         }
         firstKeySeeked = true;
       } catch (IOException e) {

diff --git a/...adoop/hbase/util/CompoundBloomFilter.java → ...p/hbase/io/hfile/CompoundBloomFilter.java b/...adoop/hbase/util/CompoundBloomFilter.java → ...p/hbase/io/hfile/CompoundBloomFilter.java
@@ -17,7 +17,7 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.hbase.util;
+package org.apache.hadoop.hbase.io.hfile;
 
 import java.io.DataInput;
 import java.io.IOException;
@@ -26,15 +26,15 @@
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.util.BloomFilter;
-import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
-import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.util.Hash;
-import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
 
 /**
- * A Bloom filter implementation built on top of {@link BloomFilterChunk},
+ * A Bloom filter implementation built on top of 
- * encapsulating a set of fixed-size Bloom filters written out at the time of
+ * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
+ * a set of fixed-size Bloom filters written out at the time of
  * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
  * block stream, and loaded on demand at query time. This class only provides
  * reading capabilities.
@@ -84,7 +84,11 @@ public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
       throw new IllegalArgumentException("Invalid hash type: " + hashType);
     }
     // We will pass null for ROW block
-    index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
+    if(comparator == null) {
+      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
+    } else {
+      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
+    }
     index.readRootIndex(meta, numChunks);
   }
 

diff --git a/...p/hbase/util/CompoundBloomFilterBase.java → ...ase/io/hfile/CompoundBloomFilterBase.java b/...p/hbase/util/CompoundBloomFilterBase.java → ...ase/io/hfile/CompoundBloomFilterBase.java
@@ -17,9 +17,10 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.hbase.util;
+package org.apache.hadoop.hbase.io.hfile;
 
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilterBase;
 
 import org.apache.hadoop.hbase.CellComparator;
 
@@ -47,7 +48,7 @@ public class CompoundBloomFilterBase implements BloomFilterBase {
   protected long totalByteSize;
   protected long totalMaxKeys;
 
-  /** Hash function type to use, as defined in {@link Hash} */
+  /** Hash function type to use, as defined in {@link org.apache.hadoop.hbase.util.Hash} */
   protected int hashType;
   /** Comparator used to compare Bloom filter keys */
   protected CellComparator comparator;

diff --git a/...hbase/util/CompoundBloomFilterWriter.java → ...e/io/hfile/CompoundBloomFilterWriter.java b/...hbase/util/CompoundBloomFilterWriter.java → ...e/io/hfile/CompoundBloomFilterWriter.java
@@ -16,7 +16,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hbase.util;
+package org.apache.hadoop.hbase.io.hfile;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -29,9 +29,10 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.util.BloomFilterChunk;
-import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
-import org.apache.hadoop.hbase.io.hfile.InlineBlockWriter;
+import org.apache.hadoop.hbase.util.BloomFilterWriter;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Writable;
 
 /**

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -394,13 +394,13 @@ public interface Reader extends Closeable, CachingBlockReader {
 
     byte[] getLastKey();
 
-    byte[] midkey() throws IOException;
+    Cell midkey() throws IOException;
 
     long length();
 
     long getEntries();
 
-    byte[] getFirstKey();
+    Cell getFirstKey();
 
     long indexSize();