Skip to content
Permalink
Browse files
HBASE-26878 TableInputFormatBase should cache RegionSizeCalculator (#…
…4271)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
  • Loading branch information
bbeaudreault committed Mar 24, 2022
1 parent ec71417 commit 2a3ac5b33c50d30e6ab83940861f629dd98443dd
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
@@ -139,6 +139,8 @@
private TableRecordReader tableRecordReader = null;
/** The underlying {@link Connection} of the table. */
private Connection connection;
/** Used to generate splits based on region size. */
private RegionSizeCalculator regionSizeCalculator;


/** The reverse DNS lookup cache mapping: IPAddress => HostName */
@@ -288,8 +290,11 @@ public List<InputSplit> getSplits(JobContext context) throws IOException {
* @throws IOException throws IOException
*/
private List<InputSplit> oneInputSplitPerRegion() throws IOException {
RegionSizeCalculator sizeCalculator =
createRegionSizeCalculator(getRegionLocator(), getAdmin());
if (regionSizeCalculator == null) {
// Initialize here rather than with the other resources because this involves
// a full scan of meta, which can be heavy. We might as well only do it if/when necessary.
regionSizeCalculator = createRegionSizeCalculator(getRegionLocator(), getAdmin());
}

TableName tableName = getTable().getName();

@@ -302,7 +307,7 @@ private List<InputSplit> oneInputSplitPerRegion() throws IOException {
throw new IOException("Expecting at least one region.");
}
List<InputSplit> splits = new ArrayList<>(1);
long regionSize = sizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
long regionSize = regionSizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@@ -345,7 +350,7 @@ private List<InputSplit> oneInputSplitPerRegion() throws IOException {

byte[] regionName = location.getRegion().getRegionName();
String encodedRegionName = location.getRegion().getEncodedName();
long regionSize = sizeCalculator.getRegionSize(regionName);
long regionSize = regionSizeCalculator.getRegionSize(regionName);
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@@ -597,6 +602,7 @@ protected void initializeTable(Connection connection, TableName tableName) throw
this.regionLocator = connection.getRegionLocator(tableName);
this.admin = connection.getAdmin();
this.connection = connection;
this.regionSizeCalculator = null;
}

@InterfaceAudience.Private
@@ -664,6 +670,7 @@ protected void closeTable() throws IOException {
table = null;
regionLocator = null;
connection = null;
regionSizeCalculator = null;
}

private void close(Closeable... closables) throws IOException {
@@ -21,6 +21,7 @@
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.io.IOException;
@@ -56,6 +57,7 @@
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

@@ -66,6 +68,34 @@ public class TestTableInputFormatBase {
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestTableInputFormatBase.class);

@Test
public void testReuseRegionSizeCalculator() throws IOException {
JobContext context = mock(JobContext.class);
Configuration conf = HBaseConfiguration.create();
conf.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL,
ConnectionForMergeTesting.class.getName());
conf.set(TableInputFormat.INPUT_TABLE, "testTable");
conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
when(context.getConfiguration()).thenReturn(conf);

TableInputFormat format = Mockito.spy(new TableInputFormatForMergeTesting());
format.setConf(conf);
// initialize so that table is set, otherwise cloneOnFinish
// will be true and each getSplits call will re-initialize everything
format.initialize(context);
format.getSplits(context);
format.getSplits(context);

// re-initialize which will cause the next getSplits call to create a new RegionSizeCalculator
format.initialize(context);
format.getSplits(context);
format.getSplits(context);

// should only be 2 despite calling getSplits 4 times
Mockito.verify(format, Mockito.times(2))
.createRegionSizeCalculator(Mockito.any(), Mockito.any());
}

@Test
public void testTableInputFormatBaseReverseDNSForIPv6()
throws UnknownHostException {

0 comments on commit 2a3ac5b

Please sign in to comment.