New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[JENKINS-38383] Avoid Stackoverflow and full-flowgraph scan when trying to get branch names for log prefixes #91
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,8 @@ | |
package org.jenkinsci.plugins.workflow.job; | ||
|
||
import com.google.common.base.Optional; | ||
import com.google.common.cache.Cache; | ||
import com.google.common.cache.CacheBuilder; | ||
import com.google.common.cache.CacheLoader; | ||
import com.google.common.cache.LoadingCache; | ||
import com.google.common.collect.ImmutableSortedSet; | ||
import com.google.common.util.concurrent.FutureCallback; | ||
import com.google.common.util.concurrent.ListenableFuture; | ||
|
@@ -120,10 +119,10 @@ | |
import org.jenkinsci.plugins.workflow.flow.GraphListener; | ||
import org.jenkinsci.plugins.workflow.flow.StashManager; | ||
import org.jenkinsci.plugins.workflow.graph.BlockEndNode; | ||
import org.jenkinsci.plugins.workflow.graph.BlockStartNode; | ||
import org.jenkinsci.plugins.workflow.graph.FlowEndNode; | ||
import org.jenkinsci.plugins.workflow.graph.FlowNode; | ||
import org.jenkinsci.plugins.workflow.job.console.WorkflowConsoleLogger; | ||
import org.jenkinsci.plugins.workflow.job.properties.DurabilityHintJobProperty; | ||
import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; | ||
import org.jenkinsci.plugins.workflow.steps.StepContext; | ||
import org.jenkinsci.plugins.workflow.steps.StepExecution; | ||
|
@@ -504,7 +503,7 @@ private void copyLogs() { | |
long old = entry.getValue(); | ||
OutputStream logger; | ||
|
||
String prefix = getLogPrefix(node); | ||
String prefix = getBranchName(node); | ||
if (prefix != null) { | ||
logger = new LogLinePrefixOutputFilter(listener.getLogger(), "[" + prefix + "] "); | ||
} else { | ||
|
@@ -559,33 +558,61 @@ private long writeRawLogTo(AnnotatedLargeText<?> text, long start, OutputStream | |
} | ||
|
||
@GuardedBy("completed") | ||
private transient LoadingCache<FlowNode,Optional<String>> logPrefixCache; | ||
private @CheckForNull String getLogPrefix(FlowNode node) { | ||
// TODO could also use FlowScanningUtils.fetchEnclosingBlocks(node).filter(FlowScanningUtils.hasActionPredicate(ThreadNameAction.class)), | ||
// but this would not let us cache intermediate results | ||
private transient Cache<FlowNode,Optional<String>> branchNameCache; // TODO Consider making this a top-level FlowNode API | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See below for why we can't use a LoadingCache anymore. |
||
|
||
private Cache<FlowNode, Optional<String>> getBranchNameCache() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More for cleanliness than anything else (also reduces the complexity of getBranchName slightly). |
||
synchronized (completed) { | ||
if (logPrefixCache == null) { | ||
logPrefixCache = CacheBuilder.newBuilder().weakKeys().build(new CacheLoader<FlowNode,Optional<String>>() { | ||
@Override public @Nonnull Optional<String> load(FlowNode node) { | ||
if (node instanceof BlockEndNode) { | ||
return Optional.fromNullable(getLogPrefix(((BlockEndNode) node).getStartNode())); | ||
} | ||
ThreadNameAction threadNameAction = node.getAction(ThreadNameAction.class); | ||
if (threadNameAction != null) { | ||
return Optional.of(threadNameAction.getThreadName()); | ||
} | ||
for (FlowNode parent : node.getParents()) { | ||
String prefix = getLogPrefix(parent); | ||
if (prefix != null) { | ||
return Optional.of(prefix); | ||
} | ||
} | ||
return Optional.absent(); | ||
} | ||
}); | ||
if (branchNameCache == null) { | ||
branchNameCache = CacheBuilder.newBuilder().weakKeys().build(); | ||
} | ||
return branchNameCache; | ||
} | ||
} | ||
|
||
private @CheckForNull String getBranchName(FlowNode node) { | ||
Cache<FlowNode, Optional<String>> cache = getBranchNameCache(); | ||
|
||
Optional<String> output = cache.getIfPresent(node); | ||
if (output != null) { | ||
return output.orNull(); | ||
} | ||
|
||
// We must explicitly check for the current node being the start/end of a parallel branch | ||
if (node instanceof BlockEndNode) { | ||
output = Optional.fromNullable(getBranchName(((BlockEndNode) node).getStartNode())); | ||
cache.put(node, output); | ||
return output.orNull(); | ||
} else if (node instanceof BlockStartNode) { // And of course this node might be the start of a parallel branch | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This trivial conditional check should massively reduce the getPersistentAction calls and improve performance a bit. |
||
ThreadNameAction threadNameAction = node.getPersistentAction(ThreadNameAction.class); | ||
if (threadNameAction != null) { | ||
String name = threadNameAction.getThreadName(); | ||
cache.put(node, Optional.of(name)); | ||
return name; | ||
} | ||
} | ||
|
||
// Check parent which will USUALLY result in a cache hit, but improve performance and avoid a stack overflow by not doing recursion | ||
List<FlowNode> parents = node.getParents(); | ||
if (!parents.isEmpty()) { | ||
FlowNode parent = parents.get(0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In practice we will never hit a case where parents > 1, because only the closing block for a Parallel will have > 1 parents, and because that's a BlockEndNode it would be handled above. |
||
output = cache.getIfPresent(parent); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one line is why we can't use a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is also 100% necessary though, because it lets us still get the full benefits of the prior caching approach, while not triggering unbounded recursive calls if trying to get a prefix for the parent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why aren't these helpful comments in the code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rsandell A fair point -- the code comment about avoiding a stack overflow by not using recursion covers why it works the way it does (and should make anybody stop and think before switching to a LoadingCache again). The comment is in the PR itself because I'm explaining WHY the LoadingCache is going away -- so it's really only applicable to the code delta itself. Edit: or more specifically why we can't do something clever with the CacheLoader we had before to work around this. |
||
if (output != null) { | ||
cache.put(node, output); | ||
return output.orNull(); | ||
} | ||
} | ||
|
||
// Fall back to looking for an enclosing parallel branch... but using more efficient APIs and avoiding stack overflows | ||
output = Optional.absent(); | ||
for (BlockStartNode myNode : node.iterateEnclosingBlocks()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the meat of the fix. I'll call it out here in case someone else spots it that we're not consulting the cache here, but that's okay because the cache can't help us anyway -- this logic only runs when we're starting or resuming a build (otherwise we'd have a parent cached). |
||
ThreadNameAction threadNameAction = myNode.getPersistentAction(ThreadNameAction.class); | ||
if (threadNameAction != null) { | ||
output = Optional.of(threadNameAction.getThreadName()); | ||
break; | ||
} | ||
return logPrefixCache.getUnchecked(node).orNull(); | ||
} | ||
cache.put(node, output); | ||
return output.orNull(); | ||
} | ||
|
||
private static final class LogLinePrefixOutputFilter extends LineTransformationOutputStream { | ||
|
@@ -697,6 +724,7 @@ private void finish(@Nonnull Result r, @CheckForNull Throwable t) { | |
listener.closeQuietly(); | ||
} | ||
logsToCopy = null; | ||
branchNameCache = null; | ||
try { | ||
save(); | ||
} catch (Exception x) { | ||
|
@@ -1016,7 +1044,7 @@ private final class GraphL implements GraphListener { | |
|
||
private void logNodeMessage(FlowNode node) { | ||
WorkflowConsoleLogger wfLogger = new WorkflowConsoleLogger(listener); | ||
String prefix = getLogPrefix(node); | ||
String prefix = getBranchName(node); | ||
if (prefix != null) { | ||
wfLogger.log(String.format("[%s] %s", prefix, node.getDisplayFunctionName())); | ||
} else { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cf. jenkinsci/pipeline-plugin#338 & JENKINS-50350