Skip to content
Permalink
Browse files
[NO ISSUE][*DB] Handle unchecked exceptions during global recovery
Change-Id: If4766f783a0e1b398d81681be8bc70b8a507d673
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11046
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Ian Maxon <imaxon@uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
  • Loading branch information
mblow committed Apr 14, 2021
1 parent 681cd36 commit 1940f26a56508e06774f98f76d6fb37d65178051
Showing 1 changed file with 20 additions and 22 deletions.
@@ -47,11 +47,9 @@
import org.apache.asterix.metadata.utils.MetadataConstants;
import org.apache.hyracks.api.application.ICCServiceContext;
import org.apache.hyracks.api.client.IHyracksClientConnection;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.job.JobId;
import org.apache.hyracks.api.job.JobSpecification;
import org.apache.hyracks.util.ExitUtil;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

@@ -95,15 +93,19 @@ public void startGlobalRecovery(ICcApplicationContext appCtx) {
synchronized (this) {
if (!recovering) {
recovering = true;
/**
/*
* Perform recovery on a different thread to avoid deadlocks in
* {@link org.apache.asterix.common.cluster.IClusterStateManager}
*/
serviceCtx.getControllerService().getExecutor().submit(() -> {
try {
recover(appCtx);
} catch (HyracksDataException e) {
LOGGER.log(Level.ERROR, "Global recovery failed. Shutting down...", e);
} catch (Throwable e) {
try {
LOGGER.fatal("Global recovery failed. Shutting down...", e);
} catch (Throwable ignore) {
// ignoring exception trying to log, just do the halt
}
ExitUtil.exit(ExitUtil.EC_FAILED_TO_RECOVER);
}
});
@@ -112,24 +114,20 @@ public void startGlobalRecovery(ICcApplicationContext appCtx) {
}
}

protected void recover(ICcApplicationContext appCtx) throws HyracksDataException {
try {
LOGGER.info("Starting Global Recovery");
MetadataManager.INSTANCE.init();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
if (appCtx.getStorageProperties().isStorageGlobalCleanup()) {
int storageGlobalCleanupTimeout = appCtx.getStorageProperties().getStorageGlobalCleanupTimeout();
performGlobalStorageCleanup(mdTxnCtx, storageGlobalCleanupTimeout);
}
mdTxnCtx = doRecovery(appCtx, mdTxnCtx);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
recoveryCompleted = true;
recovering = false;
LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
appCtx.getClusterStateManager().refreshState();
} catch (Exception e) {
throw HyracksDataException.create(e);
protected void recover(ICcApplicationContext appCtx) throws Exception {
LOGGER.info("Starting Global Recovery");
MetadataManager.INSTANCE.init();
MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
if (appCtx.getStorageProperties().isStorageGlobalCleanup()) {
int storageGlobalCleanupTimeout = appCtx.getStorageProperties().getStorageGlobalCleanupTimeout();
performGlobalStorageCleanup(mdTxnCtx, storageGlobalCleanupTimeout);
}
mdTxnCtx = doRecovery(appCtx, mdTxnCtx);
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
recoveryCompleted = true;
recovering = false;
LOGGER.info("Global Recovery Completed. Refreshing cluster state...");
appCtx.getClusterStateManager().refreshState();
}

protected void performGlobalStorageCleanup(MetadataTransactionContext mdTxnCtx, int storageGlobalCleanupTimeoutSecs)

0 comments on commit 1940f26

Please sign in to comment.