HBASE-16372 References to previous cell in read path should be avoided

(Ram)
apache · Oct 6, 2016 · 58e843d · 58e843d
1 parent eb33b60
commit 58e843d
Show file tree

Hide file tree

Showing 24 changed files with 697 additions and 47 deletions.
diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java
@@ -19,12 +19,17 @@
 
 package org.apache.hadoop.hbase.client;
 
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
+@Category({ SmallTests.class, ClientTests.class })
 public class TestPut {
   @Test
   public void testCopyConstructor() {

diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/exceptions/TestClientExceptionsUtil.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/exceptions/TestClientExceptionsUtil.java
@@ -19,13 +19,17 @@
 package org.apache.hadoop.hbase.exceptions;
 
 import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 
 import java.io.IOException;
 
 import static org.junit.Assert.*;
 
 @SuppressWarnings("ThrowableInstanceNeverThrown")
+@Category({ SmallTests.class, ClientTests.class })
 public class TestClientExceptionsUtil {
 
   @Test

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java
@@ -97,7 +97,7 @@ public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue>
   }
 
 
-  /**************** copy key only *********************/
+  /**************** copy the cell to create a new keyvalue *********************/
 
   public static KeyValue copyToNewKeyValue(final Cell cell) {
     byte[] bytes = copyToNewByteArray(cell);
@@ -118,6 +118,21 @@ public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
     return buffer;
   }
 
+  /**
+   * Copies the key to a new KeyValue
+   * @param cell
+   * @return the KeyValue that consists only the key part of the incoming cell
+   */
+  public static KeyValue toNewKeyCell(final Cell cell) {
+    byte[] bytes = new byte[keyLength(cell)];
+    appendKeyTo(cell, bytes, 0);
+    KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
+    // Set the seq id. The new key cell could be used in comparisons so it
+    // is important that it uses the seqid also. If not the comparsion would fail
+    kv.setSequenceId(cell.getSequenceId());
+    return kv;
+  }
+
   public static byte[] copyToNewByteArray(final Cell cell) {
     int v1Length = length(cell);
     byte[] backingBytes = new byte[v1Length];

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
@@ -1,5 +1,4 @@
 /*
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -29,6 +28,7 @@
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.util.BloomFilterChunk;
@@ -60,6 +60,8 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
 
   /** The size of individual Bloom filter chunks to create */
   private int chunkByteSize;
+  /** The prev Cell that was processed  */
+  private Cell prevCell;
 
   /** A Bloom filter chunk enqueued for writing */
   private static class ReadyChunk {
@@ -159,7 +161,7 @@ private void enqueueReadyChunk(boolean closing) {
   }
 
   @Override
-  public void add(Cell cell) {
+  public void append(Cell cell) throws IOException {
     if (cell == null)
       throw new NullPointerException();
 
@@ -181,9 +183,22 @@ public void add(Cell cell) {
     }
 
     chunk.add(cell);
+    this.prevCell = cell;
     ++totalKeyCount;
   }
 
+  @Override
+  public void beforeShipped() throws IOException {
+    if (this.prevCell != null) {
+      this.prevCell = KeyValueUtil.toNewKeyCell(this.prevCell);
+    }
+  }
+
+  @Override
+  public Cell getPrevCell() {
+    return this.prevCell;
+  }
+
   private void allocateNewChunk() {
     if (prevChunk == null) {
       // First chunk

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -54,6 +54,8 @@
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
+import org.apache.hadoop.hbase.regionserver.CellSink;
+import org.apache.hadoop.hbase.regionserver.ShipperListener;
 import org.apache.hadoop.hbase.shaded.com.google.protobuf.UnsafeByteOperations;
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
@@ -194,15 +196,13 @@ public static final long getChecksumFailuresCount() {
   }
 
   /** API required to write an {@link HFile} */
-  public interface Writer extends Closeable {
+  public interface Writer extends Closeable, CellSink, ShipperListener {
     /** Max memstore (mvcc) timestamp in FileInfo */
     public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
 
     /** Add an element to the file info map. */
     void appendFileInfo(byte[] key, byte[] value) throws IOException;
 
-    void append(Cell cell) throws IOException;
-
     /** @return the path to this {@link HFile} */
     Path getPath();
 

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -36,6 +36,7 @@
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.CellComparator.MetaCellComparator;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.io.compress.Compression;
@@ -701,6 +702,20 @@ public void append(final Cell cell) throws IOException {
     }
   }
 
+  @Override
+  public void beforeShipped() throws IOException {
+    // Add clone methods for every cell
+    if (this.lastCell != null) {
+      this.lastCell = KeyValueUtil.toNewKeyCell(this.lastCell);
+    }
+    if (this.firstCellInBlock != null) {
+      this.firstCellInBlock = KeyValueUtil.toNewKeyCell(this.firstCellInBlock);
+    }
+    if (this.lastCellOfPreviousBlock != null) {
+      this.lastCellOfPreviousBlock = KeyValueUtil.toNewKeyCell(this.lastCellOfPreviousBlock);
+    }
+  }
+
   protected void finishFileInfo() throws IOException {
     if (lastCell != null) {
       // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
@@ -35,6 +35,7 @@
 import org.apache.hadoop.hbase.TagType;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.regionserver.CellSink;
 import org.apache.hadoop.hbase.regionserver.HMobStore;
 import org.apache.hadoop.hbase.regionserver.HStore;
 import org.apache.hadoop.hbase.regionserver.InternalScanner;

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMultiFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/AbstractMultiFileWriter.java
@@ -26,13 +26,13 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.regionserver.compactions.Compactor.CellSink;
+import org.apache.hadoop.hbase.regionserver.CellSink;
 
 /**
  * Base class for cell sink that separates the provided cells into multiple files.
  */
 @InterfaceAudience.Private
-public abstract class AbstractMultiFileWriter implements CellSink {
+public abstract class AbstractMultiFileWriter implements CellSink, ShipperListener {
 
   private static final Log LOG = LogFactory.getLog(AbstractMultiFileWriter.class);
 
@@ -116,4 +116,13 @@ protected void preCommitWriters() throws IOException {
    */
   protected void preCloseWriter(StoreFileWriter writer) throws IOException {
   }
+
+  @Override
+  public void beforeShipped() throws IOException {
+    if (this.writers() != null) {
+      for (StoreFileWriter writer : writers()) {
+        writer.beforeShipped();
+      }
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilterWriter;
+
+/**
+ * A sink of cells that allows appending cells to the Writers that implement it.
+ * {@link org.apache.hadoop.hbase.io.hfile.HFile.Writer},
+ * {@link StoreFileWriter}, {@link AbstractMultiFileWriter},
+ * {@link BloomFilterWriter} are some implementors of this.
+ */
+@InterfaceAudience.Private
+public interface CellSink {
+  /**
+   * Append the given cell
+   * @param cell the cell to be added
+   * @throws IOException
+   */
+  void append(Cell cell) throws IOException;
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -971,6 +971,8 @@ public StoreFileWriter createWriterInTmp(long maxKeyCount, Compression.Algorithm
    * @param includesTag - includesTag or not
    * @return Writer for a new StoreFile in the tmp dir.
    */
+  // TODO : allow the Writer factory to create Writers of ShipperListener type only in case of
+  // compaction
   @Override
   public StoreFileWriter createWriterInTmp(long maxKeyCount, Compression.Algorithm compression,
       boolean isCompaction, boolean includeMVCCReadpoint, boolean includesTag,

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ShipperListener.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ShipperListener.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+
+/**
+ * Implementors of this interface are the ones who needs to do some action when the
+ * {@link Shipper#shipped()} is called
+ */
+@InterfaceAudience.Private
+public interface ShipperListener {
+
+  /**
+   * The action that needs to be performed before {@link Shipper#shipped()} is performed
+   * @throws IOException
+   */
+  void beforeShipped() throws IOException;
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java
@@ -35,7 +35,6 @@
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
-import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
 import org.apache.hadoop.hbase.util.BloomContext;
 import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.BloomFilterWriter;
@@ -51,7 +50,7 @@
  * local because it is an implementation detail of the HBase regionserver.
  */
 @InterfaceAudience.Private
-public class StoreFileWriter implements Compactor.CellSink {
+public class StoreFileWriter implements CellSink, ShipperListener {
   private static final Log LOG = LogFactory.getLog(StoreFileWriter.class.getName());
 
   private final BloomFilterWriter generalBloomFilterWriter;
@@ -120,6 +119,7 @@ private StoreFileWriter(FileSystem fs, Path path,
     // it no longer writable.
     this.timeRangeTrackerSet = trt != null;
     this.timeRangeTracker = this.timeRangeTrackerSet? trt: new TimeRangeTracker();
+    // TODO : Change all writers to be specifically created for compaction context
     writer = HFile.getWriterFactory(conf, cacheConf)
         .withPath(fs, path)
         .withComparator(comparator)
@@ -140,10 +140,10 @@ private StoreFileWriter(FileSystem fs, Path path,
       // init bloom context
       switch (bloomType) {
       case ROW:
-        bloomContext = new RowBloomContext(generalBloomFilterWriter);
+        bloomContext = new RowBloomContext(generalBloomFilterWriter, comparator);
         break;
       case ROWCOL:
-        bloomContext = new RowColBloomContext(generalBloomFilterWriter);
+        bloomContext = new RowColBloomContext(generalBloomFilterWriter, comparator);
         break;
       default:
         throw new IOException(
@@ -160,7 +160,7 @@ private StoreFileWriter(FileSystem fs, Path path,
       this.deleteFamilyBloomFilterWriter = BloomFilterFactory
           .createDeleteBloomAtWrite(conf, cacheConf,
               (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
-      deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter);
+      deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter, comparator);
     } else {
       deleteFamilyBloomFilterWriter = null;
     }
@@ -251,13 +251,27 @@ private void appendDeleteFamilyBloomFilter(final Cell cell)
     }
   }
 
+  @Override
   public void append(final Cell cell) throws IOException {
     appendGeneralBloomfilter(cell);
     appendDeleteFamilyBloomFilter(cell);
     writer.append(cell);
     trackTimestamps(cell);
   }
 
+  @Override
+  public void beforeShipped() throws IOException {
+    // For now these writer will always be of type ShipperListener true.
+    // TODO : Change all writers to be specifically created for compaction context
+    writer.beforeShipped();
+    if (generalBloomFilterWriter != null) {
+      generalBloomFilterWriter.beforeShipped();
+    }
+    if (deleteFamilyBloomFilterWriter != null) {
+      deleteFamilyBloomFilterWriter.beforeShipped();
+    }
+  }
+
   public Path getPath() {
     return this.writer.getPath();
   }

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlusher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFlusher.java
@@ -32,7 +32,6 @@
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
-import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
 import org.apache.hadoop.hbase.regionserver.throttle.ThroughputControlUtil;
 import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
 
@@ -111,7 +110,7 @@ protected InternalScanner createScanner(KeyValueScanner snapshotScanner,
    * @param smallestReadPoint Smallest read point used for the flush.
    * @param throughputController A controller to avoid flush too fast
    */
-  protected void performFlush(InternalScanner scanner, Compactor.CellSink sink,
+  protected void performFlush(InternalScanner scanner, CellSink sink,
       long smallestReadPoint, ThroughputController throughputController) throws IOException {
     int compactionKVMax =
       conf.getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT);