Skip to content
Permalink
Browse files
fix race in DSTTest.testCancel
seeing errors on jenkins server though it runs fine locally;
have patched what looks like one race, but not sure it explains the failure
at https://builds.apache.org/job/incubator-brooklyn-pull-requests/org.apache.brooklyn$brooklyn-core/2440/testReport/junit/org.apache.brooklyn.util.core.task/DynamicSequentialTaskTest/testCancelled/
where we are seeing an extra semaphore at the end of the test;
have added comments and logging in case it is seen again.
(locally all is well.)
  • Loading branch information
ahgittin committed Jan 22, 2016
1 parent b28ba02 commit cfd54d5bc5201f0d8d8cde29ac6893f5a6090947
Showing 3 changed files with 43 additions and 15 deletions.
@@ -67,7 +67,6 @@
import com.google.common.base.CaseFormat;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ExecutionList;
@@ -565,7 +564,15 @@ public boolean cancel(TaskCancellationMode mode) {
int subtasksReallyCancelled=0;

if (task instanceof HasTaskChildren) {
for (Task<?> child: ((HasTaskChildren)task).getChildren()) {
// cancel tasks in reverse order --
// it should be the case that if child1 is cancelled,
// a parentTask should NOT call a subsequent child2,
// but just in case, we cancel child2 first
// NB: DST and others may apply their own recursive cancel behaviour
MutableList<Task<?>> childrenReversed = MutableList.copyOf( ((HasTaskChildren)task).getChildren() );
Collections.reverse(childrenReversed);

for (Task<?> child: childrenReversed) {
if (log.isTraceEnabled()) {
log.trace("Cancelling "+child+" on recursive cancellation of "+task);
}
@@ -298,7 +298,7 @@ public final synchronized boolean cancel(boolean mayInterruptIfRunning) {
public synchronized boolean cancel(TaskCancellationMode mode) {
if (isDone()) return false;
if (log.isTraceEnabled()) {
log.trace("BT cancelling "+this+" mode "+mode);
log.trace("BT cancelling "+this+" mode "+mode+", from thread "+Thread.currentThread());
}
cancelled = true;
doCancel(mode);
@@ -139,6 +139,7 @@ public String call() {
Thread.sleep(duration.toMillisecondsRoundingUp());
}
} catch (InterruptedException e) {
log.info("releasing semaphore on interruption after saying "+message);
cancellations.release();
throw Exceptions.propagate(e);
}
@@ -159,7 +160,8 @@ public Task<String> sayTask(String message) {
}

public Task<String> sayTask(String message, Duration duration, String message2) {
return Tasks.<String>builder().displayName("say:"+message).body(sayCallable(message, duration, message2)).build();
return Tasks.<String>builder().displayName("say:"+message+(duration!=null ? ":wait("+duration+")" : "")+(message2!=null ? ":"+message2 : ""))
.body(sayCallable(message, duration, message2)).build();
}

public <T> Task<T> submitting(final Task<T> task) {
@@ -193,33 +195,52 @@ public void testCancelled() throws Exception {
sayTask("2a", Duration.THIRTY_SECONDS, "2b"),
sayTask("3"));
ec.submit(t);


// wait for 2 to start, saying "2a", and the first interruptible block is when it waits for its 30s
waitForMessages(Predicates.compose(MathPredicates.greaterThanOrEqual(2), CollectionFunctionals.sizeFunction()), TIMEOUT);
Assert.assertEquals(messages, Arrays.asList("1", "2a"));
Time.sleep(Duration.millis(50));

// now cancel, and make sure we get the right behaviour
t.cancel(true);
Assert.assertTrue(t.isDone());
// 2 should get cancelled, and invoke the cancellation semaphore
// 2 should get cancelled, and invoke the cancellation semaphore, but not say 2b
// 3 should get cancelled and not run at all
Assert.assertEquals(messages, Arrays.asList("1", "2a"));

// Need to ensure that 2 has been started; race where we might cancel it before its run method
// is even begun. Hence doing "2a; pause; 2b" where nothing is interruptable before pause.
// cancel(..) currently cancels everything in the tree in the calling thread
// so we could even assert task3.isCancelled() now
// but not sure we will guarantee that for subtasks, so weaker assertion
// that it is eventually cancelled, and that it for sure never starts

// message list is still 1, 2a
Assert.assertEquals(messages, Arrays.asList("1", "2a"));
// And 2 when cancelled should release the semaphore
log.info("testCancelled waiting on semaphore; permits left is "+cancellations.availablePermits());
Assert.assertTrue(cancellations.tryAcquire(10, TimeUnit.SECONDS));
log.info("testCancelled acquired semaphore; permits left is "+cancellations.availablePermits());

Iterator<Task<?>> ci = ((HasTaskChildren)t).getChildren().iterator();
// 1 completed fine
Assert.assertEquals(ci.next().get(), "1");
// 2 is cancelled -- cancelled flag should always be set *before* the interrupt sent
// (and that released the semaphore above, even if thread is not completed, so this should be set)
Task<?> task2 = ci.next();
Assert.assertTrue(task2.isBegun());
Assert.assertTrue(task2.isDone());
Assert.assertTrue(task2.isCancelled());

Task<?> task3 = ci.next();
// 3 is being cancelled in the thread which cancelled 2, and should either be
// *before* 2 was cancelled or *not run* because the parent was cancelled
// so we shouldn't need to wait ... but if we did:
// Asserts.eventually(Suppliers.ofInstance(task3), TaskPredicates.isDone());
Assert.assertTrue(task3.isDone());
Assert.assertTrue(task3.isCancelled());
Assert.assertFalse(task3.isBegun());
Assert.assertTrue(task2.isDone());
Assert.assertTrue(task2.isCancelled());

// but we do _not_ get a mutex from task3 as it does not run (is not interrupted)
// messages unchanged
Assert.assertEquals(messages, Arrays.asList("1", "2a"));
// no further mutexes should be available (ie 3 should not run)
// TODO for some reason this was observed to fail on the jenkins box (2016-01)
// but i can't see why; have added logging in case it happens again
Assert.assertEquals(cancellations.availablePermits(), 0);
}

0 comments on commit cfd54d5

Please sign in to comment.