Skip to content

Commit

Permalink
HBASE-16372 References to previous cell in read path should be avoided
Browse files Browse the repository at this point in the history
(Ram)
  • Loading branch information
Ramkrishna committed Oct 6, 2016
1 parent eb33b60 commit 58e843d
Show file tree
Hide file tree
Showing 24 changed files with 697 additions and 47 deletions.
Expand Up @@ -19,12 +19,17 @@

package org.apache.hadoop.hbase.client;

import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;

@Category({ SmallTests.class, ClientTests.class })
public class TestPut {
@Test
public void testCopyConstructor() {
Expand Down
Expand Up @@ -19,13 +19,17 @@
package org.apache.hadoop.hbase.exceptions;

import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import java.io.IOException;

import static org.junit.Assert.*;

@SuppressWarnings("ThrowableInstanceNeverThrown")
@Category({ SmallTests.class, ClientTests.class })
public class TestClientExceptionsUtil {

@Test
Expand Down
Expand Up @@ -97,7 +97,7 @@ public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue>
}


/**************** copy key only *********************/
/**************** copy the cell to create a new keyvalue *********************/

public static KeyValue copyToNewKeyValue(final Cell cell) {
byte[] bytes = copyToNewByteArray(cell);
Expand All @@ -118,6 +118,21 @@ public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
return buffer;
}

/**
* Copies the key to a new KeyValue
* @param cell
* @return the KeyValue that consists only the key part of the incoming cell
*/
public static KeyValue toNewKeyCell(final Cell cell) {
byte[] bytes = new byte[keyLength(cell)];
appendKeyTo(cell, bytes, 0);
KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
// Set the seq id. The new key cell could be used in comparisons so it
// is important that it uses the seqid also. If not the comparsion would fail
kv.setSequenceId(cell.getSequenceId());
return kv;
}

public static byte[] copyToNewByteArray(final Cell cell) {
int v1Length = length(cell);
byte[] backingBytes = new byte[v1Length];
Expand Down
@@ -1,5 +1,4 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -29,6 +28,7 @@
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.BloomFilterChunk;
Expand Down Expand Up @@ -60,6 +60,8 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase

/** The size of individual Bloom filter chunks to create */
private int chunkByteSize;
/** The prev Cell that was processed */
private Cell prevCell;

/** A Bloom filter chunk enqueued for writing */
private static class ReadyChunk {
Expand Down Expand Up @@ -159,7 +161,7 @@ private void enqueueReadyChunk(boolean closing) {
}

@Override
public void add(Cell cell) {
public void append(Cell cell) throws IOException {
if (cell == null)
throw new NullPointerException();

Expand All @@ -181,9 +183,22 @@ public void add(Cell cell) {
}

chunk.add(cell);
this.prevCell = cell;
++totalKeyCount;
}

@Override
public void beforeShipped() throws IOException {
if (this.prevCell != null) {
this.prevCell = KeyValueUtil.toNewKeyCell(this.prevCell);
}
}

@Override
public Cell getPrevCell() {
return this.prevCell;
}

private void allocateNewChunk() {
if (prevChunk == null) {
// First chunk
Expand Down
Expand Up @@ -54,6 +54,8 @@
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.regionserver.CellSink;
import org.apache.hadoop.hbase.regionserver.ShipperListener;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.UnsafeByteOperations;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
Expand Down Expand Up @@ -194,15 +196,13 @@ public static final long getChecksumFailuresCount() {
}

/** API required to write an {@link HFile} */
public interface Writer extends Closeable {
public interface Writer extends Closeable, CellSink, ShipperListener {
/** Max memstore (mvcc) timestamp in FileInfo */
public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY");

/** Add an element to the file info map. */
void appendFileInfo(byte[] key, byte[] value) throws IOException;

void append(Cell cell) throws IOException;

/** @return the path to this {@link HFile} */
Path getPath();

Expand Down
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.CellComparator.MetaCellComparator;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression;
Expand Down Expand Up @@ -701,6 +702,20 @@ public void append(final Cell cell) throws IOException {
}
}

@Override
public void beforeShipped() throws IOException {
// Add clone methods for every cell
if (this.lastCell != null) {
this.lastCell = KeyValueUtil.toNewKeyCell(this.lastCell);
}
if (this.firstCellInBlock != null) {
this.firstCellInBlock = KeyValueUtil.toNewKeyCell(this.firstCellInBlock);
}
if (this.lastCellOfPreviousBlock != null) {
this.lastCellOfPreviousBlock = KeyValueUtil.toNewKeyCell(this.lastCellOfPreviousBlock);
}
}

protected void finishFileInfo() throws IOException {
if (lastCell != null) {
// Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
Expand Down
Expand Up @@ -35,6 +35,7 @@
import org.apache.hadoop.hbase.TagType;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.CellSink;
import org.apache.hadoop.hbase.regionserver.HMobStore;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
Expand Down
Expand Up @@ -26,13 +26,13 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor.CellSink;
import org.apache.hadoop.hbase.regionserver.CellSink;

/**
* Base class for cell sink that separates the provided cells into multiple files.
*/
@InterfaceAudience.Private
public abstract class AbstractMultiFileWriter implements CellSink {
public abstract class AbstractMultiFileWriter implements CellSink, ShipperListener {

private static final Log LOG = LogFactory.getLog(AbstractMultiFileWriter.class);

Expand Down Expand Up @@ -116,4 +116,13 @@ protected void preCommitWriters() throws IOException {
*/
protected void preCloseWriter(StoreFileWriter writer) throws IOException {
}

@Override
public void beforeShipped() throws IOException {
if (this.writers() != null) {
for (StoreFileWriter writer : writers()) {
writer.beforeShipped();
}
}
}
}
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.BloomFilterWriter;

/**
* A sink of cells that allows appending cells to the Writers that implement it.
* {@link org.apache.hadoop.hbase.io.hfile.HFile.Writer},
* {@link StoreFileWriter}, {@link AbstractMultiFileWriter},
* {@link BloomFilterWriter} are some implementors of this.
*/
@InterfaceAudience.Private
public interface CellSink {
/**
* Append the given cell
* @param cell the cell to be added
* @throws IOException
*/
void append(Cell cell) throws IOException;
}
Expand Up @@ -971,6 +971,8 @@ public StoreFileWriter createWriterInTmp(long maxKeyCount, Compression.Algorithm
* @param includesTag - includesTag or not
* @return Writer for a new StoreFile in the tmp dir.
*/
// TODO : allow the Writer factory to create Writers of ShipperListener type only in case of
// compaction
@Override
public StoreFileWriter createWriterInTmp(long maxKeyCount, Compression.Algorithm compression,
boolean isCompaction, boolean includeMVCCReadpoint, boolean includesTag,
Expand Down
@@ -0,0 +1,36 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;

import org.apache.hadoop.hbase.classification.InterfaceAudience;

/**
* Implementors of this interface are the ones who needs to do some action when the
* {@link Shipper#shipped()} is called
*/
@InterfaceAudience.Private
public interface ShipperListener {

/**
* The action that needs to be performed before {@link Shipper#shipped()} is performed
* @throws IOException
*/
void beforeShipped() throws IOException;
}
Expand Up @@ -35,7 +35,6 @@
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
import org.apache.hadoop.hbase.util.BloomContext;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
Expand All @@ -51,7 +50,7 @@
* local because it is an implementation detail of the HBase regionserver.
*/
@InterfaceAudience.Private
public class StoreFileWriter implements Compactor.CellSink {
public class StoreFileWriter implements CellSink, ShipperListener {
private static final Log LOG = LogFactory.getLog(StoreFileWriter.class.getName());

private final BloomFilterWriter generalBloomFilterWriter;
Expand Down Expand Up @@ -120,6 +119,7 @@ private StoreFileWriter(FileSystem fs, Path path,
// it no longer writable.
this.timeRangeTrackerSet = trt != null;
this.timeRangeTracker = this.timeRangeTrackerSet? trt: new TimeRangeTracker();
// TODO : Change all writers to be specifically created for compaction context
writer = HFile.getWriterFactory(conf, cacheConf)
.withPath(fs, path)
.withComparator(comparator)
Expand All @@ -140,10 +140,10 @@ private StoreFileWriter(FileSystem fs, Path path,
// init bloom context
switch (bloomType) {
case ROW:
bloomContext = new RowBloomContext(generalBloomFilterWriter);
bloomContext = new RowBloomContext(generalBloomFilterWriter, comparator);
break;
case ROWCOL:
bloomContext = new RowColBloomContext(generalBloomFilterWriter);
bloomContext = new RowColBloomContext(generalBloomFilterWriter, comparator);
break;
default:
throw new IOException(
Expand All @@ -160,7 +160,7 @@ private StoreFileWriter(FileSystem fs, Path path,
this.deleteFamilyBloomFilterWriter = BloomFilterFactory
.createDeleteBloomAtWrite(conf, cacheConf,
(int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter);
deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter, comparator);
} else {
deleteFamilyBloomFilterWriter = null;
}
Expand Down Expand Up @@ -251,13 +251,27 @@ private void appendDeleteFamilyBloomFilter(final Cell cell)
}
}

@Override
public void append(final Cell cell) throws IOException {
appendGeneralBloomfilter(cell);
appendDeleteFamilyBloomFilter(cell);
writer.append(cell);
trackTimestamps(cell);
}

@Override
public void beforeShipped() throws IOException {
// For now these writer will always be of type ShipperListener true.
// TODO : Change all writers to be specifically created for compaction context
writer.beforeShipped();
if (generalBloomFilterWriter != null) {
generalBloomFilterWriter.beforeShipped();
}
if (deleteFamilyBloomFilterWriter != null) {
deleteFamilyBloomFilterWriter.beforeShipped();
}
}

public Path getPath() {
return this.writer.getPath();
}
Expand Down
Expand Up @@ -32,7 +32,6 @@
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
import org.apache.hadoop.hbase.regionserver.throttle.ThroughputControlUtil;
import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;

Expand Down Expand Up @@ -111,7 +110,7 @@ protected InternalScanner createScanner(KeyValueScanner snapshotScanner,
* @param smallestReadPoint Smallest read point used for the flush.
* @param throughputController A controller to avoid flush too fast
*/
protected void performFlush(InternalScanner scanner, Compactor.CellSink sink,
protected void performFlush(InternalScanner scanner, CellSink sink,
long smallestReadPoint, ThroughputController throughputController) throws IOException {
int compactionKVMax =
conf.getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT);
Expand Down

0 comments on commit 58e843d

Please sign in to comment.