Skip to content

Commit

Permalink
HBASE-13993 WALProcedureStore fencing is not effective if new WAL rolls
Browse files Browse the repository at this point in the history
  • Loading branch information
enis committed Jul 10, 2015
1 parent bf5b75d commit c16bbf4
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,22 @@ private long getMillisFromLastRoll() {
}

protected boolean rollWriter() throws IOException {
return rollWriter(flushLogId + 1);
// Create new state-log
if (!rollWriter(flushLogId + 1)) {
LOG.warn("someone else has already created log " + flushLogId);
return false;
}

// We have the lease on the log,
// but we should check if someone else has created new files
if (getMaxLogId(getLogFiles()) > flushLogId) {
LOG.warn("Someone else created new logs. Expected maxLogId < " + flushLogId);
logs.getLast().removeFile();
return false;
}

// We have the lease on the log
return true;
}

private boolean rollWriter(final long logId) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ public static void assertIsAbortException(final ProcedureResult result) {
public static class TestProcedure extends Procedure<Void> {
public TestProcedure() {}

public TestProcedure(long procId) {
this(procId, 0);
}

public TestProcedure(long procId, long parentId) {
setProcId(procId);
if (parentId > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
import org.apache.hadoop.hbase.procedure2.store.wal.TestWALProcedureStore.TestSequentialProcedure;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
Expand Down Expand Up @@ -166,13 +166,32 @@ public void abortProcess() {
backupStore3Abort.await();
}

@Test(timeout=60000)
/**
* Tests proper fencing in case the current WAL store is fenced
*/
@Test
public void testWALfencingWithoutWALRolling() throws IOException {
testWALfencing(false);
}

/**
* Tests proper fencing in case the current WAL store does not receive writes until after the
* new WAL does a couple of WAL rolls.
*/
@Test
public void testWALfencingWithWALRolling() throws IOException {
testWALfencing(true);
}

public void testWALfencing(boolean walRolls) throws IOException {
final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore);

HMaster firstMaster = UTIL.getHBaseCluster().getMaster();

// cause WAL rolling after a delete in WAL:
firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold", 1);

HMaster backupMaster3 = Mockito.mock(HMaster.class);
Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
Expand All @@ -186,20 +205,27 @@ public void testWALfencingWithWALRolling() throws IOException {
procStore2.start(1);
procStore2.recoverLease();

LOG.info("Inserting into second WALProcedureStore");
// insert something to the second store then delete it, causing a WAL roll
Procedure proc2 = new TestSequentialProcedure();
procStore2.insert(proc2, null);
procStore2.rollWriterOrDie();
// before writing back to the WAL store, optionally do a couple of WAL rolls (which causes
// to delete the old WAL files).
if (walRolls) {
LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
for (int i = 0; i < 512; i++) {
// insert something to the second store then delete it, causing a WAL roll(s)
Procedure proc2 = new TestProcedure(i);
procStore2.insert(proc2, null);
procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later
}
}

// Now, insert something to the first store, should fail.
// If the store does a WAL roll and continue with another logId without checking higher logIds
// it will incorrectly succeed.
LOG.info("Inserting into first WALProcedureStore");
// insert something to the first store
proc2 = new TestSequentialProcedure();
try {
procStore.insert(proc2, null);
fail("expected RuntimeException 'sync aborted'");
} catch (RuntimeException e) {
LOG.info("got " + e.getMessage());
procStore.insert(new TestProcedure(11), null);
fail("Inserting into Procedure Store should have failed");
} catch (Exception ex) {
LOG.info("Received expected exception", ex);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ public void testWalRollOnLowReplication() throws Exception {
UTIL.getDFSCluster().restartDataNode(dnCount);
for (long i = 2; i < 100; ++i) {
store.insert(new TestProcedure(i, -1), null);
waitForNumReplicas(3);
Thread.sleep(100);
if ((i % 30) == 0) {
LOG.info("Restart Data Node");
Expand All @@ -196,4 +197,18 @@ public void testWalRollOnLowReplication() throws Exception {
}
assertTrue(store.isRunning());
}

public void waitForNumReplicas(int numReplicas) throws Exception {
while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
Thread.sleep(100);
}

for (int i = 0; i < numReplicas; ++i) {
for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
while (!dn.isDatanodeFullyStarted()) {
Thread.sleep(100);
}
}
}
}
}

0 comments on commit c16bbf4

Please sign in to comment.