Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-25922 - Disabled sanity checks ignored on snapshot restore #4533

Merged
merged 8 commits into from
Aug 26, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -7684,15 +7684,17 @@ public static Region openHRegion(final Region other, final CancelableProgressabl
*/
private HRegion openHRegion(final CancelableProgressable reporter) throws IOException {
try {
CompoundConfiguration cConfig =
new CompoundConfiguration().add(conf).addBytesMap(htableDescriptor.getValues());
// Refuse to open the region if we are missing local compression support
TableDescriptorChecker.checkCompression(htableDescriptor);
TableDescriptorChecker.checkCompression(cConfig, htableDescriptor);
// Refuse to open the region if encryption configuration is incorrect or
// codec support is missing
LOG.debug("checking encryption for " + this.getRegionInfo().getEncodedName());
TableDescriptorChecker.checkEncryption(conf, htableDescriptor);
TableDescriptorChecker.checkEncryption(cConfig, htableDescriptor);
// Refuse to open the region if a required class cannot be loaded
LOG.debug("checking classloading for " + this.getRegionInfo().getEncodedName());
TableDescriptorChecker.checkClassLoading(conf, htableDescriptor);
TableDescriptorChecker.checkClassLoading(cConfig, htableDescriptor);
this.openSeqNum = initialize(reporter);
this.mvcc.advanceTo(openSeqNum);
// The openSeqNum must be increased every time when a region is assigned, as we rely on it to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ public final class TableDescriptorChecker {
private TableDescriptorChecker() {
}

private static boolean shouldSanityCheck(final Configuration conf) {
if (conf.getBoolean(TABLE_SANITY_CHECKS, DEFAULT_TABLE_SANITY_CHECKS)) {
return true;
}
return false;
}

/**
* Checks whether the table conforms to some sane limits, and configured values (compression, etc)
* work. Throws an exception if something is wrong.
Expand All @@ -68,15 +75,8 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td)
throws IOException {
CompoundConfiguration conf = new CompoundConfiguration().add(c).addBytesMap(td.getValues());

// Setting this to true logs the warning instead of throwing exception
boolean logWarn = false;
if (!conf.getBoolean(TABLE_SANITY_CHECKS, DEFAULT_TABLE_SANITY_CHECKS)) {
logWarn = true;
}
String tableVal = td.getValue(TABLE_SANITY_CHECKS);
if (tableVal != null && !Boolean.valueOf(tableVal)) {
logWarn = true;
}
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);

// check max file size
long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
Expand Down Expand Up @@ -107,36 +107,20 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td)
}

// check that coprocessors and other specified plugin classes can be loaded
try {
checkClassLoading(conf, td);
} catch (Exception ex) {
warnOrThrowExceptionForFailure(logWarn, ex.getMessage(), null);
}
checkClassLoading(conf, td);

if (conf.getBoolean(MASTER_CHECK_COMPRESSION, DEFAULT_MASTER_CHECK_COMPRESSION)) {
// check compression can be loaded
try {
checkCompression(td);
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
checkCompression(conf, td);
}

if (conf.getBoolean(MASTER_CHECK_ENCRYPTION, DEFAULT_MASTER_CHECK_ENCRYPTION)) {
// check encryption can be loaded
try {
checkEncryption(conf, td);
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
checkEncryption(conf, td);
}

// Verify compaction policy
try {
checkCompactionPolicy(conf, td);
} catch (IOException e) {
warnOrThrowExceptionForFailure(false, e.getMessage(), e);
}
checkCompactionPolicy(conf, td);
// check that we have at least 1 CF
if (td.getColumnFamilyCount() == 0) {
String message = "Table should have at least one column family.";
Expand All @@ -155,6 +139,12 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td)
warnOrThrowExceptionForFailure(false, "Meta table can't be set as read only.", null);
}

// check replication scope
checkReplicationScope(conf, td);

// check bloom filter type
checkBloomFilterType(conf, td);

for (ColumnFamilyDescriptor hcd : td.getColumnFamilies()) {
if (hcd.getTimeToLive() <= 0) {
String message = "TTL for column family " + hcd.getNameAsString() + " must be positive.";
Expand Down Expand Up @@ -185,11 +175,6 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td)
warnOrThrowExceptionForFailure(logWarn, message, null);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the place where we use logWarn in this method? Then let's move the boolean logWarn = !shouldSanityCheck(conf); line here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are multiple checks based on logWarn variable before this line (for e.g. at line no 92, 106, 127, 134 above)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, OK, in unchanged code.

}

// check replication scope
checkReplicationScope(hcd);
// check bloom filter type
checkBloomFilterType(hcd);

// check data replication factor, it can be 0(default value) when user has not explicitly
// set the value, in this case we use default replication factor set in the file system.
if (hcd.getDFSReplication() < 0) {
Expand All @@ -207,103 +192,144 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td)
}
}

private static void checkReplicationScope(final ColumnFamilyDescriptor cfd) throws IOException {
// check replication scope
WALProtos.ScopeType scop = WALProtos.ScopeType.valueOf(cfd.getScope());
if (scop == null) {
String message = "Replication scope for column family " + cfd.getNameAsString() + " is "
+ cfd.getScope() + " which is invalid.";

LOG.error(message);
throw new DoNotRetryIOException(message);
}
}

private static void checkCompactionPolicy(Configuration conf, TableDescriptor td)
private static void checkReplicationScope(final Configuration conf, final TableDescriptor td)
throws IOException {
// FIFO compaction has some requirements
// Actually FCP ignores periodic major compactions
String className = td.getValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
if (className == null) {
className = conf.get(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY,
ExploringCompactionPolicy.class.getName());
}

int blockingFileCount = HStore.DEFAULT_BLOCKING_STOREFILE_COUNT;
String sv = td.getValue(HStore.BLOCKING_STOREFILES_KEY);
if (sv != null) {
blockingFileCount = Integer.parseInt(sv);
} else {
blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, blockingFileCount);
}

for (ColumnFamilyDescriptor hcd : td.getColumnFamilies()) {
String compactionPolicy =
hcd.getConfigurationValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
if (compactionPolicy == null) {
compactionPolicy = className;
}
if (!compactionPolicy.equals(FIFOCompactionPolicy.class.getName())) {
continue;
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);
try {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
// check replication scope
WALProtos.ScopeType scop = WALProtos.ScopeType.valueOf(cfd.getScope());
if (scop == null) {
String message = "Replication scope for column family " + cfd.getNameAsString() + " is "
+ cfd.getScope() + " which is invalid.";

throw new DoNotRetryIOException(message);
}
}
// FIFOCompaction
String message = null;
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}

// 1. Check TTL
if (hcd.getTimeToLive() == ColumnFamilyDescriptorBuilder.DEFAULT_TTL) {
message = "Default TTL is not supported for FIFO compaction";
throw new IOException(message);
}
}

// 2. Check min versions
if (hcd.getMinVersions() > 0) {
message = "MIN_VERSION > 0 is not supported for FIFO compaction";
throw new IOException(message);
private static void checkCompactionPolicy(final Configuration conf, final TableDescriptor td)
throws IOException {
try {
// FIFO compaction has some requirements
// Actually FCP ignores periodic major compactions
String className = td.getValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
if (className == null) {
className = conf.get(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY,
ExploringCompactionPolicy.class.getName());
}

// 3. blocking file count
sv = hcd.getConfigurationValue(HStore.BLOCKING_STOREFILES_KEY);
int blockingFileCount = HStore.DEFAULT_BLOCKING_STOREFILE_COUNT;
String sv = td.getValue(HStore.BLOCKING_STOREFILES_KEY);
if (sv != null) {
blockingFileCount = Integer.parseInt(sv);
} else {
blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, blockingFileCount);
}
if (blockingFileCount < 1000) {
message =
"Blocking file count '" + HStore.BLOCKING_STOREFILES_KEY + "' " + blockingFileCount
+ " is below recommended minimum of 1000 for column family " + hcd.getNameAsString();
throw new IOException(message);

for (ColumnFamilyDescriptor hcd : td.getColumnFamilies()) {
String compactionPolicy =
hcd.getConfigurationValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
if (compactionPolicy == null) {
compactionPolicy = className;
}
if (!compactionPolicy.equals(FIFOCompactionPolicy.class.getName())) {
continue;
}
// FIFOCompaction
String message = null;

// 1. Check TTL
if (hcd.getTimeToLive() == ColumnFamilyDescriptorBuilder.DEFAULT_TTL) {
message = "Default TTL is not supported for FIFO compaction";
throw new IOException(message);
}

// 2. Check min versions
if (hcd.getMinVersions() > 0) {
message = "MIN_VERSION > 0 is not supported for FIFO compaction";
throw new IOException(message);
}

// 3. blocking file count
sv = hcd.getConfigurationValue(HStore.BLOCKING_STOREFILES_KEY);
if (sv != null) {
blockingFileCount = Integer.parseInt(sv);
}
if (blockingFileCount < 1000) {
message =
"Blocking file count '" + HStore.BLOCKING_STOREFILES_KEY + "' " + blockingFileCount
+ " is below recommended minimum of 1000 for column family " + hcd.getNameAsString();
throw new IOException(message);
}
}
} catch (IOException e) {
warnOrThrowExceptionForFailure(false, e.getMessage(), e);
}
}

private static void checkBloomFilterType(ColumnFamilyDescriptor cfd) throws IOException {
Configuration conf = new CompoundConfiguration().addStringMap(cfd.getConfiguration());
private static void checkBloomFilterType(final Configuration conf, final TableDescriptor td)
throws IOException {
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);
try {
BloomFilterUtil.getBloomFilterParam(cfd.getBloomFilterType(), conf);
} catch (IllegalArgumentException e) {
throw new DoNotRetryIOException("Failed to get bloom filter param", e);
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
Configuration cfdConf = new CompoundConfiguration().addStringMap(cfd.getConfiguration());
try {
BloomFilterUtil.getBloomFilterParam(cfd.getBloomFilterType(), cfdConf);
} catch (IllegalArgumentException e) {
throw new DoNotRetryIOException("Failed to get bloom filter param", e);
}
}
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
}

public static void checkCompression(final TableDescriptor td) throws IOException {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
CompressionTest.testCompression(cfd.getCompressionType());
CompressionTest.testCompression(cfd.getCompactionCompressionType());
CompressionTest.testCompression(cfd.getMajorCompactionCompressionType());
CompressionTest.testCompression(cfd.getMinorCompactionCompressionType());
public static void checkCompression(final Configuration conf, final TableDescriptor td)
throws IOException {
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);
try {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
CompressionTest.testCompression(cfd.getCompressionType());
CompressionTest.testCompression(cfd.getCompactionCompressionType());
CompressionTest.testCompression(cfd.getMajorCompactionCompressionType());
CompressionTest.testCompression(cfd.getMinorCompactionCompressionType());
}
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
}

public static void checkEncryption(final Configuration conf, final TableDescriptor td)
throws IOException {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
EncryptionTest.testEncryption(conf, cfd.getEncryptionType(), cfd.getEncryptionKey());
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);
try {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
EncryptionTest.testEncryption(conf, cfd.getEncryptionType(), cfd.getEncryptionKey());
}
} catch (IOException e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
}

public static void checkClassLoading(final Configuration conf, final TableDescriptor td)
throws IOException {
RegionSplitPolicy.getSplitPolicyClass(td, conf);
RegionCoprocessorHost.testTableCoprocessorAttrs(conf, td);
// Setting logs to warning instead of throwing exception if sanityChecks are disabled
boolean logWarn = !shouldSanityCheck(conf);
try {
RegionSplitPolicy.getSplitPolicyClass(td, conf);
RegionCoprocessorHost.testTableCoprocessorAttrs(conf, td);
} catch (Exception e) {
warnOrThrowExceptionForFailure(logWarn, e.getMessage(), e);
}
}

// HBASE-13350 - Helper method to log warning on sanity check failures if checks disabled.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ public static void startCluster() throws Exception {
LOG.info("Starting cluster");
Configuration conf = TEST_UTIL.getConfiguration();

// Disable sanity check for coprocessor
conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false);
// Enable sanity check for coprocessor, so that region reopen fails on the RS
conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);

// set RIT stuck warning threshold to a small value
conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20);
Expand All @@ -100,6 +100,8 @@ public static void startCluster() throws Exception {
TEST_UTIL.startMiniCluster(1);
CLUSTER = TEST_UTIL.getHBaseCluster();
MASTER = CLUSTER.getMaster();
// Disable sanity check for coprocessor, so that modify table runs on the HMaster
MASTER.getConfiguration().setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false);
}

@AfterClass
Expand Down Expand Up @@ -133,7 +135,6 @@ public void testRITAssignmentManagerMetrics() throws Exception {
amSource);

// alter table with a non-existing coprocessor

TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
.setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
Expand All @@ -40,9 +41,11 @@
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
import org.apache.hadoop.hbase.util.TableDescriptorChecker;
import org.apache.hadoop.metrics2.MetricsExecutor;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
Expand All @@ -60,6 +63,14 @@ public class TestOpenRegionFailedMemoryLeak {

private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();

@BeforeClass
public static void startCluster() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();

// Enable sanity check for coprocessor
conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, it's due to the fact that this test checks region open is failing due to invalid loaded coprocessor at line 90 below, before this change it was always true (since the coprocessor loading was independent of sanity check). However after this change, we have made it configurable based on the sanity check config (which is always false in hbase-server's test related hbase-site.xml), so we need to explicitly enable sanity check in the test setup

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, seems to have some side effects. Need to fill a release note.

}

@AfterClass
public static void tearDown() throws IOException {
EnvironmentEdgeManagerTestHelper.reset();
Expand Down