Skip to content

Commit

Permalink
[MRESOLVER-370] Lock factory diagnostic on failure (#299)
Browse files Browse the repository at this point in the history
The change "pulls" all `NamedLock` interface methods "level up" from implementations to `NamedLockSupport`, to be able to augment instances at `NamedLockSupport` level.

If diagnostic enabled:
* factory makes all named lock instances to gather "diag stats" (in memory!)
* on adapter failure, factory will dump out diagnostic state,
* each active lock (non closed) will dump it's state out (per thread with "steps")
* output goes to lock factory INFO logger, potentially swarming console

By default, when "diagnostic" not turned on, no change really.

---

https://issues.apache.org/jira/browse/MRESOLVER-370
  • Loading branch information
cstamas committed Jun 13, 2023
1 parent d7fb513 commit a0d2f38
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ public void acquire(Collection<? extends Artifact> artifacts, Collection<? exten
if (!illegalStateExceptions.isEmpty()) {
IllegalStateException ex = new IllegalStateException("Could not acquire lock(s)");
illegalStateExceptions.forEach(ex::addSuppressed);
throw ex;
throw namedLockFactory.onFailure(ex);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,17 @@ public interface NamedLockFactory {
* Performs a clean shut down of the factory.
*/
void shutdown();

/**
* Method to notify factory about locking failure, to make it possible to provide more (factory specific)
* information about factory state when a locking operation failed. Factory may alter provided failure or
* provide information via some other side effect (for example via logging).
* <p>
* The default implementation merely does what happened before: adds no extra information.
*
* @since TBD
*/
default <E extends Throwable> E onFailure(E failure) {
return failure;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,17 @@ private NoopNamedLock(final String name, final NamedLockFactorySupport factory)
}

@Override
public boolean lockShared(final long time, final TimeUnit unit) {
protected boolean doLockShared(final long time, final TimeUnit unit) {
return true;
}

@Override
public boolean lockExclusively(final long time, final TimeUnit unit) {
protected boolean doLockExclusively(final long time, final TimeUnit unit) {
return true;
}

@Override
public void unlock() {
protected void doUnlock() {
// no-op
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public AdaptedSemaphoreNamedLock(
}

@Override
public boolean lockShared(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockShared(final long time, final TimeUnit unit) throws InterruptedException {
Deque<Integer> perms = threadPerms.get();
if (!perms.isEmpty()) { // we already own shared or exclusive lock
perms.push(NONE);
Expand All @@ -80,7 +80,7 @@ public boolean lockShared(final long time, final TimeUnit unit) throws Interrupt
}

@Override
public boolean lockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
Deque<Integer> perms = threadPerms.get();
if (!perms.isEmpty()) { // we already own shared or exclusive lock
if (perms.contains(EXCLUSIVE)) {
Expand All @@ -98,7 +98,7 @@ public boolean lockExclusively(final long time, final TimeUnit unit) throws Inte
}

@Override
public void unlock() {
protected void doUnlock() {
Deque<Integer> steps = threadPerms.get();
if (steps.isEmpty()) {
throw new IllegalStateException("Wrong API usage: unlock without lock");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ public FileLockNamedLock(final String name, final FileChannel fileChannel, final
}

@Override
public boolean lockShared(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockShared(final long time, final TimeUnit unit) throws InterruptedException {
return retry(time, unit, RETRY_SLEEP_MILLIS, this::doLockShared, null, false);
}

@Override
public boolean lockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
return retry(time, unit, RETRY_SLEEP_MILLIS, this::doLockExclusively, null, false);
}

Expand Down Expand Up @@ -169,7 +169,7 @@ private Boolean doLockExclusively() {
}

@Override
public void unlock() {
protected void doUnlock() {
criticalRegion.lock();
try {
Deque<Boolean> steps = threadSteps.computeIfAbsent(Thread.currentThread(), k -> new ArrayDeque<>());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
*/
package org.eclipse.aether.named.support;

import java.util.Deque;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
Expand All @@ -32,12 +35,35 @@
* Support class for {@link NamedLockFactory} implementations providing reference counting.
*/
public abstract class NamedLockFactorySupport implements NamedLockFactory {
/**
* System property key to enable locking diagnostic collection.
*
* @since TBD
*/
private static final boolean DIAGNOSTIC_ENABLED = Boolean.getBoolean("aether.named.diagnostic.enabled");

protected final Logger logger = LoggerFactory.getLogger(getClass());

private final ConcurrentMap<String, NamedLockHolder> locks;

private final boolean diagnosticEnabled;

public NamedLockFactorySupport() {
this(DIAGNOSTIC_ENABLED);
}

public NamedLockFactorySupport(boolean diagnosticEnabled) {
this.locks = new ConcurrentHashMap<>();
this.diagnosticEnabled = diagnosticEnabled;
}

/**
* Returns {@code true} if factory diagnostic collection is enabled.
*
* @since TBD
*/
public boolean isDiagnosticEnabled() {
return diagnosticEnabled;
}

@Override
Expand All @@ -57,6 +83,32 @@ public void shutdown() {
// override if needed
}

@Override
public <E extends Throwable> E onFailure(E failure) {
if (isDiagnosticEnabled()) {
Map<String, NamedLockHolder> locks = new HashMap<>(this.locks); // copy
int activeLocks = locks.size();
logger.info("Diagnostic dump of lock factory");
logger.info("===============================");
logger.info("Implementation: {}", getClass().getName());
logger.info("Active locks: {}", activeLocks);
logger.info("");
if (activeLocks > 0) {
for (Map.Entry<String, NamedLockHolder> entry : locks.entrySet()) {
String name = entry.getKey();
int refCount = entry.getValue().referenceCount.get();
NamedLockSupport lock = entry.getValue().namedLock;
logger.info("Name: {}", name);
logger.info("RefCount: {}", refCount);
Map<Thread, Deque<String>> diag = lock.diagnosticState();
diag.forEach((key, value) -> logger.info(" {} -> {}", key, value));
}
logger.info("");
}
}
return failure;
}

public void closeLock(final String name) {
locks.compute(name, (k, v) -> {
if (v != null && v.decRef() == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
*/
package org.eclipse.aether.named.support;

import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Deque;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;

import org.eclipse.aether.named.NamedLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -32,18 +39,79 @@ public abstract class NamedLockSupport implements NamedLock {

private final NamedLockFactorySupport factory;

private final ConcurrentHashMap<Thread, Deque<String>> diagnosticState; // non-null only if diag enabled

public NamedLockSupport(final String name, final NamedLockFactorySupport factory) {
this.name = name;
this.factory = factory;
this.diagnosticState = factory.isDiagnosticEnabled() ? new ConcurrentHashMap<>() : null;
}

@Override
public String name() {
return name;
}

@Override
public boolean lockShared(long time, TimeUnit unit) throws InterruptedException {
if (diagnosticState != null) {
diagnosticState
.computeIfAbsent(Thread.currentThread(), k -> new ArrayDeque<>())
.push("shared");
}
return doLockShared(time, unit);
}

protected abstract boolean doLockShared(long time, TimeUnit unit) throws InterruptedException;

@Override
public boolean lockExclusively(long time, TimeUnit unit) throws InterruptedException {
if (diagnosticState != null) {
diagnosticState
.computeIfAbsent(Thread.currentThread(), k -> new ArrayDeque<>())
.push("exclusive");
}
return doLockExclusively(time, unit);
}

protected abstract boolean doLockExclusively(long time, TimeUnit unit) throws InterruptedException;

@Override
public void unlock() {
doUnlock();
if (diagnosticState != null) {
diagnosticState
.computeIfAbsent(Thread.currentThread(), k -> new ArrayDeque<>())
.pop();
}
}

protected abstract void doUnlock();

@Override
public void close() {
doClose();
}

protected void doClose() {
factory.closeLock(name);
}

/**
* Returns the diagnostic state (if collected) or empty map, never {@code null}.
*
* @since TBD
*/
public Map<Thread, Deque<String>> diagnosticState() {
if (diagnosticState != null) {
return diagnosticState;
} else {
return Collections.emptyMap();
}
}

@Override
public String toString() {
return getClass().getSimpleName() + "{" + "name='" + name + '\'' + '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public ReadWriteLockNamedLock(
}

@Override
public boolean lockShared(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockShared(final long time, final TimeUnit unit) throws InterruptedException {
Deque<Step> steps = threadSteps.get();
if (readWriteLock.readLock().tryLock(time, unit)) {
steps.push(Step.SHARED);
Expand All @@ -63,7 +63,7 @@ public boolean lockShared(final long time, final TimeUnit unit) throws Interrupt
}

@Override
public boolean lockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
protected boolean doLockExclusively(final long time, final TimeUnit unit) throws InterruptedException {
Deque<Step> steps = threadSteps.get();
if (!steps.isEmpty()) { // we already own shared or exclusive lock
if (!steps.contains(Step.EXCLUSIVE)) {
Expand All @@ -78,7 +78,7 @@ public boolean lockExclusively(final long time, final TimeUnit unit) throws Inte
}

@Override
public void unlock() {
protected void doUnlock() {
Deque<Step> steps = threadSteps.get();
if (steps.isEmpty()) {
throw new IllegalStateException("Wrong API usage: unlock without lock");
Expand Down
12 changes: 12 additions & 0 deletions maven-resolver-named-locks/src/site/markdown/index.md.vm
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,15 @@ Out of the box, name mapper implementations are the following:
- `file-gav` implemented in `org.eclipse.aether.internal.impl.synccontext.named.FileGAVNameMapper`.

Note: the `file-gav` name mapper MUST be used with `file-lock` named locking, no other mapper will work with it.

${esc.hash}${esc.hash} Diagnostic collection in case of failures

If you experience locking failures, to get access to full dump (may be huge in case of big builds) on failure,
enable lock diagnostic collection. When enabled, "diagnostic dump" will be emitted by factory
to INFO log (console by default). Please note, that enabling diagnostic dump may increase heap requirement (as
diagnostic is collected in memory).

To enable diagnostic collection, set Java System Property `aether.named.diagnostic.enabled` to `true`.

If diagnostic collection is not enabled, Resolver will "just" fail, telling about failed lock, but not reveal any
information about other active locks and threads.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ protected String lockName() {
return testName.getMethodName();
}

@Test(expected = IllegalStateException.class)
public void testFailure() throws InterruptedException {
// note: set system property "aether.named.diagnostic.enabled" to "true" to have log output
// this test does NOT assert its presence, only the proper flow
Thread t1 = new Thread(() -> {
try {
namedLockFactory.getLock(lockName()).lockShared(1L, TimeUnit.MINUTES);
namedLockFactory.getLock(lockName()).lockShared(1L, TimeUnit.MINUTES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
Thread t2 = new Thread(() -> {
try {
namedLockFactory.getLock(lockName()).lockShared(1L, TimeUnit.MINUTES);
namedLockFactory.getLock(lockName()).lockShared(1L, TimeUnit.MINUTES);
namedLockFactory.getLock(lockName()).lockShared(1L, TimeUnit.MINUTES);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
t1.start();
t2.start();
t1.join();
t2.join();
throw namedLockFactory.onFailure(new IllegalStateException("failure"));
}

@Test
public void refCounting() {
final String name = lockName();
Expand Down

0 comments on commit a0d2f38

Please sign in to comment.