HBASE-26567 Remove IndexType from ChunkCreator (#3947)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
apache · Jan 4, 2022 · 70cb9b0 · 70cb9b0
1 parent 39a8e3b
commit 70cb9b0
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 88 deletions.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java
@@ -28,6 +28,7 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.LongAdder;
+
 import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.HeapMemoryTuneObserver;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.util.StringUtils;
@@ -134,51 +135,36 @@ public static ChunkCreator getInstance() {
     return instance;
   }
 
-  /**
-   * Creates and inits a chunk. The default implementation for a specific chunk size.
-   * @return the chunk that was initialized
-   */
-  Chunk getChunk(ChunkType chunkType) {
-    return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, chunkType);
-  }
 
   /**
-   * Creates and inits a chunk. The default implementation.
+   * Creates and inits a data chunk. The default implementation.
    * @return the chunk that was initialized
    */
   Chunk getChunk() {
-    return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, ChunkType.DATA_CHUNK);
+    return getChunk(ChunkType.DATA_CHUNK);
   }
 
   /**
-   * Creates and inits a chunk. The default implementation for a specific index type.
+   * Creates and inits a chunk with specific type.
    * @return the chunk that was initialized
    */
-  Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
-    return getChunk(chunkIndexType, ChunkType.DATA_CHUNK);
-  }
-
-  /**
-   * Creates and inits a chunk with specific index type and type.
-   * @return the chunk that was initialized
-   */
-  Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType) {
+  Chunk getChunk(ChunkType chunkType) {
     switch (chunkType) {
       case INDEX_CHUNK:
         if (indexChunksPool == null) {
           if (indexChunkSize <= 0) {
             throw new IllegalArgumentException(
                 "chunkType is INDEX_CHUNK but indexChunkSize is:[" + this.indexChunkSize + "]");
           }
-          return getChunk(chunkIndexType, chunkType, indexChunkSize);
+          return getChunk(chunkType, indexChunkSize);
         } else {
-          return getChunk(chunkIndexType, chunkType, indexChunksPool.getChunkSize());
+          return getChunk(chunkType, indexChunksPool.getChunkSize());
         }
       case DATA_CHUNK:
         if (dataChunksPool == null) {
-          return getChunk(chunkIndexType, chunkType, chunkSize);
+          return getChunk(chunkType, chunkSize);
         } else {
-          return getChunk(chunkIndexType, chunkType, dataChunksPool.getChunkSize());
+          return getChunk(chunkType, dataChunksPool.getChunkSize());
         }
       default:
         throw new IllegalArgumentException(
@@ -189,10 +175,9 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType)
   /**
    * Creates and inits a chunk.
    * @return the chunk that was initialized
-   * @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
    * @param size the size of the chunk to be allocated, in bytes
    */
-  Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType, int size) {
+  Chunk getChunk(ChunkType chunkType, int size) {
     Chunk chunk = null;
     MemStoreChunkPool pool = null;
 
@@ -217,9 +202,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
     }
 
     if (chunk == null) {
-      // the second parameter explains whether CellChunkMap index is requested,
-      // in that case, put allocated on demand chunk mapping into chunkIdMap
-      chunk = createChunk(false, chunkIndexType, chunkType, size);
+      chunk = createChunk(false, chunkType, size);
     }
 
     // now we need to actually do the expensive memory allocation step in case of a new chunk,
@@ -240,22 +223,21 @@ Chunk getJumboChunk(int jumboSize) {
     if (allocSize <= this.getChunkSize(ChunkType.DATA_CHUNK)) {
       LOG.warn("Jumbo chunk size " + jumboSize + " must be more than regular chunk size "
           + this.getChunkSize(ChunkType.DATA_CHUNK) + ". Converting to regular chunk.");
-      return getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+      return getChunk();
     }
     // the new chunk is going to hold the jumbo cell data and needs to be referenced by
-    // a strong map. Therefore the CCM index type
-    return getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK, allocSize);
+    // a strong map.
+    return getChunk(ChunkType.JUMBO_CHUNK, allocSize);
   }
 
   /**
    * Creates the chunk either onheap or offheap
    * @param pool indicates if the chunks have to be created which will be used by the Pool
-   * @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
+   * @param chunkType whether the requested chunk is data chunk or index chunk.
    * @param size the size of the chunk to be allocated, in bytes
    * @return the chunk
    */
-  private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexType,
-      ChunkType chunkType, int size) {
+  private Chunk createChunk(boolean pool, ChunkType chunkType, int size) {
     Chunk chunk = null;
     int id = chunkID.getAndIncrement();
     assert id > 0;
@@ -265,22 +247,39 @@ private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexT
     } else {
       chunk = new OnheapChunk(size, id, chunkType, pool);
     }
-    if (pool || (chunkIndexType == CompactingMemStore.IndexType.CHUNK_MAP)) {
-      // put the pool chunk into the chunkIdMap so it is not GC-ed
-      this.chunkIdMap.put(chunk.getId(), chunk);
-    }
+
+    /**
+     * Here we always put the chunk into the {@link ChunkCreator#chunkIdMap} no matter whether the
+     * chunk is pooled or not. <br/>
+     * For {@link CompactingMemStore},because the chunk could only be acquired from
+     * {@link ChunkCreator} through {@link MemStoreLABImpl}, and
+     * {@link CompactingMemStore#indexType} could only be {@link IndexType.CHUNK_MAP} when using
+     * {@link MemStoreLABImpl}, so we must put chunk into this {@link ChunkCreator#chunkIdMap} to
+     * make sure the chunk could be got by chunkId.
+     * <p>
+     * For {@link DefaultMemStore},it is also reasonable to put the chunk in
+     * {@link ChunkCreator#chunkIdMap} because: <br/>
+     * 1.When the {@link MemStoreLAB} which created the chunk is not closed, this chunk is used by
+     * the {@link Segment} which references this {@link MemStoreLAB}, so this chunk certainly should
+     * not be GC-ed, putting the chunk in {@link ChunkCreator#chunkIdMap} does not prevent useless
+     * chunk to be GC-ed. <br/>
+     * 2.When the {@link MemStoreLAB} which created the chunk is closed, and if the chunk is not
+     * pooled, {@link ChunkCreator#removeChunk} is invoked to remove the chunk from this
+     * {@link ChunkCreator#chunkIdMap}, so there is no memory leak.
+     */
+    this.chunkIdMap.put(chunk.getId(), chunk);
+
     return chunk;
   }
 
-  // Chunks from pool are created covered with strong references anyway
-  // TODO: change to CHUNK_MAP if it is generally defined
-  private Chunk createChunkForPool(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
+  // Chunks from pool are created covered with strong references anyway.
+  private Chunk createChunkForPool(ChunkType chunkType,
       int chunkSize) {
     if (chunkSize != dataChunksPool.getChunkSize() &&
             chunkSize != indexChunksPool.getChunkSize()) {
       return null;
     }
-    return createChunk(true, chunkIndexType, chunkType, chunkSize);
+    return createChunk(true, chunkType, chunkSize);
   }
 
   // Used to translate the ChunkID into a chunk ref
@@ -346,7 +345,7 @@ private  class MemStoreChunkPool implements HeapMemoryTuneObserver {
       this.reclaimedChunks = new LinkedBlockingQueue<>();
       for (int i = 0; i < initialCount; i++) {
         Chunk chunk =
-            createChunk(true, CompactingMemStore.IndexType.ARRAY_MAP, chunkType, chunkSize);
+            createChunk(true, chunkType, chunkSize);
         chunk.init();
         reclaimedChunks.add(chunk);
       }
@@ -368,10 +367,6 @@ private  class MemStoreChunkPool implements HeapMemoryTuneObserver {
      * @see #putbackChunks(Chunk)
      */
     Chunk getChunk() {
-      return getChunk(CompactingMemStore.IndexType.ARRAY_MAP);
-    }
-
-    Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
       Chunk chunk = reclaimedChunks.poll();
       if (chunk != null) {
         chunk.reset();
@@ -382,7 +377,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
           long created = this.chunkCount.get();
           if (created < this.maxCount) {
             if (this.chunkCount.compareAndSet(created, created + 1)) {
-              chunk = createChunkForPool(chunkIndexType, chunkType, chunkSize);
+              chunk = createChunkForPool(chunkType, chunkSize);
               break;
             }
           } else {
@@ -559,7 +554,7 @@ int getPoolSize(ChunkType chunkType) {
 
   boolean isChunkInPool(int chunkId) {
     Chunk c = getChunk(chunkId);
-    if (c==null) {
+    if (c == null) {
       return false;
     }
 

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java
@@ -33,6 +33,7 @@
 import org.apache.hadoop.hbase.ExtendedCell;
 import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.nio.RefCnt;
+import org.apache.hadoop.hbase.regionserver.CompactingMemStore.IndexType;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -42,27 +43,28 @@
 /**
  * A memstore-local allocation buffer.
  * <p>
- * The MemStoreLAB is basically a bump-the-pointer allocator that allocates
- * big (2MB) byte[] chunks from and then doles it out to threads that request
- * slices into the array.
+ * The MemStoreLAB is basically a bump-the-pointer allocator that allocates big (2MB) byte[] chunks
+ * from and then doles it out to threads that request slices into the array.
  * <p>
- * The purpose of this class is to combat heap fragmentation in the
- * regionserver. By ensuring that all Cells in a given memstore refer
- * only to large chunks of contiguous memory, we ensure that large blocks
- * get freed up when the memstore is flushed.
+ * The purpose of this class is to combat heap fragmentation in the regionserver. By ensuring that
+ * all Cells in a given memstore refer only to large chunks of contiguous memory, we ensure that
+ * large blocks get freed up when the memstore is flushed.
  * <p>
- * Without the MSLAB, the byte array allocated during insertion end up
- * interleaved throughout the heap, and the old generation gets progressively
- * more fragmented until a stop-the-world compacting collection occurs.
+ * Without the MSLAB, the byte array allocated during insertion end up interleaved throughout the
+ * heap, and the old generation gets progressively more fragmented until a stop-the-world compacting
+ * collection occurs.
  * <p>
- * TODO: we should probably benchmark whether word-aligning the allocations
- * would provide a performance improvement - probably would speed up the
- * Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
- * anyway.
- * The chunks created by this MemStoreLAB can get pooled at {@link ChunkCreator}.
- * When the Chunk comes from pool, it can be either an on heap or an off heap backed chunk. The chunks,
- * which this MemStoreLAB creates on its own (when no chunk available from pool), those will be
- * always on heap backed.
+ * TODO: we should probably benchmark whether word-aligning the allocations would provide a
+ * performance improvement - probably would speed up the Bytes.toLong/Bytes.toInt calls in KeyValue,
+ * but some of those are cached anyway. The chunks created by this MemStoreLAB can get pooled at
+ * {@link ChunkCreator}. When the Chunk comes from pool, it can be either an on heap or an off heap
+ * backed chunk. The chunks, which this MemStoreLAB creates on its own (when no chunk available from
+ * pool), those will be always on heap backed.
+ * <p>
+ * NOTE:if user requested to work with MSLABs (whether on- or off-heap), in
+ * {@link CompactingMemStore} ctor, the {@link CompactingMemStore#indexType} could only be
+ * {@link IndexType#CHUNK_MAP},that is to say the immutable segments using MSLABs are going to use
+ * {@link CellChunkMap} as their index.
  */
 @InterfaceAudience.Private
 public class MemStoreLABImpl implements MemStoreLAB {
@@ -78,7 +80,6 @@ public class MemStoreLABImpl implements MemStoreLAB {
   private final int dataChunkSize;
   private final int maxAlloc;
   private final ChunkCreator chunkCreator;
-  private final CompactingMemStore.IndexType idxType; // what index is used for corresponding segment
 
   // This flag is for closing this instance, its set when clearing snapshot of
   // memstore
@@ -104,13 +105,11 @@ public MemStoreLABImpl(Configuration conf) {
     // if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
     Preconditions.checkArgument(maxAlloc <= dataChunkSize,
         MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
+
     this.refCnt = RefCnt.create(() -> {
       recycleChunks();
     });
 
-    // if user requested to work with MSLABs (whether on- or off-heap), then the
-    // immutable segments are going to use CellChunkMap as their index
-    idxType = CompactingMemStore.IndexType.CHUNK_MAP;
   }
 
   @Override
@@ -339,7 +338,7 @@ private Chunk getOrMakeChunk() {
         if (c != null) {
           return c;
         }
-        c = this.chunkCreator.getChunk(idxType);
+        c = this.chunkCreator.getChunk();
         if (c != null) {
           // set the curChunk. No need of CAS as only one thread will be here
           currChunk.set(c);

diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java
@@ -282,8 +282,8 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
 
     // allocate new chunks and use the data chunk to hold the full data of the cells
     // and the index chunk to hold the cell-representations
-    Chunk dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
-    Chunk idxChunk  = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+    Chunk dataChunk = chunkCreator.getChunk();
+    Chunk idxChunk = chunkCreator.getChunk();
     // the array of index chunks to be used as a basis for CellChunkMap
     Chunk chunkArray[] = new Chunk[8];  // according to test currently written 8 is way enough
     int chunkArrayIdx = 0;
@@ -300,7 +300,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
       // do we have enough space to write the cell data on the data chunk?
       if (dataOffset + kv.getSerializedSize() > chunkCreator.getChunkSize()) {
         // allocate more data chunks if needed
-        dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+        dataChunk = chunkCreator.getChunk();
         dataBuffer = dataChunk.getData();
         dataOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
       }
@@ -310,7 +310,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
       // do we have enough space to write the cell-representation on the index chunk?
       if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
         // allocate more index chunks if needed
-        idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+        idxChunk = chunkCreator.getChunk();
         idxBuffer = idxChunk.getData();
         idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
         chunkArray[chunkArrayIdx++] = idxChunk;
@@ -331,10 +331,10 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
     // allocate new chunks and use the data JUMBO chunk to hold the full data of the cells
     // and the normal index chunk to hold the cell-representations
     Chunk dataJumboChunk =
-        chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
+        chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
           smallChunkSize);
     assertTrue(dataJumboChunk.isJumbo());
-    Chunk idxChunk  = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+    Chunk idxChunk = chunkCreator.getChunk();
     // the array of index chunks to be used as a basis for CellChunkMap
     Chunk[] chunkArray = new Chunk[8];  // according to test currently written 8 is way enough
     int chunkArrayIdx = 0;
@@ -354,7 +354,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
       // do we have enough space to write the cell-representation on the index chunk?
       if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
         // allocate more index chunks if needed
-        idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
+        idxChunk = chunkCreator.getChunk();
         idxBuffer = idxChunk.getData();
         idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
         chunkArray[chunkArrayIdx++] = idxChunk;
@@ -368,7 +368,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
       // Jumbo chunks are working only with one cell per chunk, thus always allocate a new jumbo
       // data chunk for next cell
       dataJumboChunk =
-          chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
+          chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
             smallChunkSize);
       assertTrue(dataJumboChunk.isJumbo());
       dataBuffer = dataJumboChunk.getData();