Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow CppLinkAction to inspect input sizes in resource estimation #19203

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/actions:execution_requirements",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/actions:localhost_capacity",
"//src/main/java/com/google/devtools/build/lib/actions:middleman_type",
"//src/main/java/com/google/devtools/build/lib/actions:runfiles_supplier",
Expand Down Expand Up @@ -123,6 +124,7 @@ java_library(
"//src/main/protobuf:failure_details_java_proto",
"//third_party:auto_value",
"//third_party:caffeine",
"//third_party:flogger",
"//third_party:guava",
"//third_party:jsr305",
"//third_party/protobuf:protobuf_java",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
package com.google.devtools.build.lib.rules.cpp;

import static com.google.devtools.build.lib.actions.ActionAnalysisMetadata.mergeMaps;
import static com.google.common.base.Preconditions.checkState;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.actions.AbstractAction;
import com.google.devtools.build.lib.actions.ActionEnvironment;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -38,8 +40,10 @@
import com.google.devtools.build.lib.actions.CommandLines.CommandLineAndParamFileInfo;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.actions.ExecutionRequirements;
import com.google.devtools.build.lib.actions.FileArtifactValue;
import com.google.devtools.build.lib.actions.ResourceSet;
import com.google.devtools.build.lib.actions.SimpleSpawn;
import com.google.devtools.build.lib.actions.SimpleSpawn.LocalResourcesSupplier;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.extra.CppLinkInfo;
Expand All @@ -62,6 +66,7 @@
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.util.ShellEscaper;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -74,6 +79,8 @@
@ThreadCompatible
public final class CppLinkAction extends AbstractAction implements CommandAction {

private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

/**
* An abstraction for creating intermediate and output artifacts for C++ linking.
*
Expand Down Expand Up @@ -328,20 +335,35 @@ ImmutableCollection<Artifact> getLinkstampObjectFileInputs() {
@Override
public ActionResult execute(ActionExecutionContext actionExecutionContext)
throws ActionExecutionException, InterruptedException {
Spawn spawn = createSpawn(actionExecutionContext);
LocalResourcesEstimator localResourcesEstimator =
new LocalResourcesEstimator(
actionExecutionContext,
OS.getCurrent(),
linkCommandLine.getLinkerInputArtifacts());

Spawn spawn = createSpawn(actionExecutionContext, localResourcesEstimator);
try {
ImmutableList<SpawnResult> spawnResult =
ImmutableList<SpawnResult> spawnResults =
actionExecutionContext
.getContext(SpawnStrategyResolver.class)
.exec(spawn, actionExecutionContext);
return ActionResult.create(spawnResult);

checkState(spawnResults.size() == 1, "Link actions are expected to create just one Spawn");
SpawnResult result = spawnResults.get(0);
Long consumedMemoryInKb = result.getMemoryInKb();
if (consumedMemoryInKb != null) {
localResourcesEstimator.logMetrics(consumedMemoryInKb);
}

return ActionResult.create(spawnResults);
} catch (ExecException e) {
throw ActionExecutionException.fromExecException(e, CppLinkAction.this);
}
}

private Spawn createSpawn(ActionExecutionContext actionExecutionContext)
throws ActionExecutionException {
private Spawn createSpawn(
ActionExecutionContext actionExecutionContext,
LocalResourcesEstimator localResourcesEstimator) throws ActionExecutionException {
try {
ArtifactExpander actionContextExpander = actionExecutionContext.getArtifactExpander();
ArtifactExpander expander = actionContextExpander;
Expand All @@ -352,10 +374,7 @@ private Spawn createSpawn(ActionExecutionContext actionExecutionContext)
getExecutionInfo(),
getInputs(),
getOutputs(),
() ->
estimateResourceConsumptionLocal(
OS.getCurrent(),
linkCommandLine.getLinkerInputArtifacts().memoizedFlattenAndGetSize()));
localResourcesEstimator);
} catch (CommandLineExpansionException e) {
String message =
String.format(
Expand Down Expand Up @@ -466,20 +485,118 @@ protected String getRawProgressMessage() {
}

/**
* Estimates resource consumption when this action is executed locally. During investigation we
* found linear dependency between used memory by action and number of inputs. For memory
* estimation we are using form C + K * inputs, where C and K selected in such way, that more than
* 95% of actions used less than C + K * inputs MB of memory during execution.
* Estimates resource consumption when this action is executed locally.
*
* <p>Because resource estimation for linking can be very costly (we need to inspect the size of
* the inputs, which means we have to expand a nested set and stat its files), we memoize those
* metrics. We do this to enable logging details about these computations after the linker has
* run, without having to re-do them.</p>
*/
static ResourceSet estimateResourceConsumptionLocal(OS os, int inputs) {
switch (os) {
case DARWIN:
return ResourceSet.createWithRamCpu(/* memoryMb= */ 15 + 0.05 * inputs, /* cpuUsage= */ 1);
case LINUX:
return ResourceSet.createWithRamCpu(
/* memoryMb= */ Math.max(50, -100 + 0.1 * inputs), /* cpuUsage= */ 1);
default:
return ResourceSet.createWithRamCpu(/* memoryMb= */ 1500 + inputs, /* cpuUsage= */ 1);
@VisibleForTesting
static class LocalResourcesEstimator implements LocalResourcesSupplier {
private final ActionExecutionContext actionExecutionContext;
private final OS os;
private final NestedSet<Artifact> inputs;

/** Container for all lazily-initialized details. */
private static class LazyData {
private int inputsCount;
private long inputsBytes;
private ResourceSet resourceSet;

LazyData(int inputsCount, long inputsBytes, ResourceSet resourceSet) {
lberki marked this conversation as resolved.
Show resolved Hide resolved
this.inputsCount = inputsCount;
this.inputsBytes = inputsBytes;
this.resourceSet = resourceSet;
}
}

private LazyData lazyData = null;

public LocalResourcesEstimator(
ActionExecutionContext actionExecutionContext, OS os, NestedSet<Artifact> inputs) {
this.actionExecutionContext = actionExecutionContext;
this.os = os;
this.inputs = inputs;
}

/** Performs costly computations required to predict linker resources consumption. */
private LazyData doCostlyEstimation() {
int inputsCount = 0;
long inputsBytes = 0;
for (Artifact input : inputs.toList()) {
inputsCount += 1;
try {
FileArtifactValue value =
actionExecutionContext.getInputMetadataProvider().getInputMetadata(input);
if (value != null) {
inputsBytes += value.getSize();
} else {
throw new IOException("no metadata");
}
} catch (IOException e) {
// TODO(https://github.com/bazelbuild/bazel/issues/17368): Propagate as ExecException when
// input sizes are used in the model.
logger.atWarning().log(
"Linker metrics: failed to get size of %s: %s (ignored)", input.getExecPath(), e);
}
}

// TODO(https://github.com/bazelbuild/bazel/issues/17368): Use inputBytes in the computations.
ResourceSet resourceSet;
switch (os) {
case DARWIN:
resourceSet =
ResourceSet.createWithRamCpu(
/* memoryMb= */ 15 + 0.05 * inputsCount, /* cpuUsage= */ 1);
break;
case LINUX:
resourceSet =
ResourceSet.createWithRamCpu(
/* memoryMb= */ Math.max(50, -100 + 0.1 * inputsCount), /* cpuUsage= */ 1);
jmmv marked this conversation as resolved.
Show resolved Hide resolved
break;
default:
resourceSet =
ResourceSet.createWithRamCpu(/* memoryMb= */ 1500 + inputsCount, /* cpuUsage= */ 1);
break;
}

return new LazyData(inputsCount, inputsBytes, resourceSet);
}

@Override
public ResourceSet get() throws ExecException {
if (lazyData == null) {
lazyData = doCostlyEstimation();
}
return lazyData.resourceSet;
}

/**
* Emits a log entry with the linker resource prediction statistics.
*
* <p>This should only be called for spawns where we have actual linker consumption metrics so
* that we can compare those to the prediction. In practice, this means that this can only be
* called for locally-executed linkers.
*
* @param actualMemoryKb memory consumed by the linker in KB
*/
public void logMetrics(long actualMemoryKb) {
if (lazyData == null) {
// We should never have to do this here because, today, the memory consumption numbers in
// actualMemoryKb are only available for locally-executed linkers, which means that get()
// has been called beforehand. But this does not necessarily have to be the case: we can
// imagine a remote spawn runner that does return consumed resources, at which point we
// would get here but get() would not have been called.
lazyData = doCostlyEstimation();
}

logger.atInfo().log(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Late to the party - do you need this log for every link action or is it ok to get a sample? I am asking because this seems to cause a bit of log spam when looking at large builds.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, sorry for the late reply.

Yes, I kinda needed it for every link action. If I look at our build, there are lots and lots of small link actions, but towards the end of the build, there are only a few large ones, each with a different memory "profile". If I were to sample these, I'd lose valuable information at that stage.

Is this still a problem after @lberki 's change to decrease the logging level?

"Linker metrics: inputs_count=%d,inputs_mb=%.2f,estimated_mb=%.2f,consumed_mb=%.2f",
lazyData.inputsCount,
((double) lazyData.inputsBytes) / 1024 / 1024,
lazyData.resourceSet.getMemoryMb(),
((double) actualMemoryKb) / 1024);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ java_test(
"//src/test/java/com/google/devtools/build/lib/testutil:TestUtils",
"//third_party:guava",
"//third_party:junit4",
"//third_party:mockito",
"//third_party:truth",
],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.mock;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
Expand All @@ -25,6 +26,7 @@
import com.google.common.primitives.Ints;
import com.google.devtools.build.lib.actions.Action;
import com.google.devtools.build.lib.actions.ActionAnalysisMetadata;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.Artifact.SpecialArtifact;
Expand All @@ -41,6 +43,7 @@
import com.google.devtools.build.lib.analysis.util.ActionTester.ActionCombinationFactory;
import com.google.devtools.build.lib.analysis.util.BuildViewTestCase;
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.packages.RuleClass.ConfiguredTargetFactory.RuleErrorException;
Expand All @@ -49,6 +52,7 @@
import com.google.devtools.build.lib.rules.cpp.CcToolchainFeatures.FeatureConfiguration;
import com.google.devtools.build.lib.rules.cpp.CcToolchainVariables.VariableValue;
import com.google.devtools.build.lib.rules.cpp.CppActionConfigs.CppPlatform;
import com.google.devtools.build.lib.rules.cpp.CppLinkAction.LocalResourcesEstimator;
import com.google.devtools.build.lib.rules.cpp.Link.LinkTargetType;
import com.google.devtools.build.lib.rules.cpp.Link.LinkingMode;
import com.google.devtools.build.lib.rules.cpp.LinkerInputs.LibraryToLink;
Expand Down Expand Up @@ -770,24 +774,53 @@ public void testCommandLineSplittingWithArchiveParamFileFeature_shouldBeOnForArc
assertThat(builder.canSplitCommandLine()).isTrue();
}

private static NestedSet<Artifact> createInputs(RuleContext ruleContext, int count) {
NestedSetBuilder<Artifact> builder = new NestedSetBuilder<>(Order.LINK_ORDER);
for (int i = 0; i < count; i++) {
Artifact artifact =
ActionsTestUtil.createArtifact(
ruleContext.getBinDirectory(), String.format("input-%d", i));
builder.add(artifact);
}
return builder.build();
}

private ResourceSet estimateResourceConsumptionLocal(
RuleContext ruleContext, OS os, int inputsCount) throws Exception {
ActionExecutionContext actionExecutionContext = mock(ActionExecutionContext.class);

NestedSet<Artifact> inputs = createInputs(ruleContext, inputsCount);
try {
LocalResourcesEstimator estimator =
new LocalResourcesEstimator(actionExecutionContext, os, inputs);
return estimator.get();
} finally {
for (Artifact input : inputs.toList()) {
input.getPath().delete();
}
}
}

@Test
public void tesLocalLinkResourceEstimate() {
assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.DARWIN, 100))
public void testLocalLinkResourceEstimate() throws Exception {
RuleContext ruleContext = createDummyRuleContext();

assertThat(estimateResourceConsumptionLocal(ruleContext, OS.DARWIN, 100))
.isEqualTo(ResourceSet.createWithRamCpu(20, 1));

assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.DARWIN, 1000))
assertThat(estimateResourceConsumptionLocal(ruleContext, OS.DARWIN, 1000))
.isEqualTo(ResourceSet.createWithRamCpu(65, 1));

assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.LINUX, 100))
assertThat(estimateResourceConsumptionLocal(ruleContext, OS.LINUX, 100))
.isEqualTo(ResourceSet.createWithRamCpu(50, 1));

assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.LINUX, 10000))
assertThat(estimateResourceConsumptionLocal(ruleContext, OS.LINUX, 10000))
.isEqualTo(ResourceSet.createWithRamCpu(900, 1));

assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.WINDOWS, 0))
assertThat(estimateResourceConsumptionLocal(ruleContext, OS.WINDOWS, 0))
.isEqualTo(ResourceSet.createWithRamCpu(1500, 1));

assertThat(CppLinkAction.estimateResourceConsumptionLocal(OS.WINDOWS, 1000))
assertThat(estimateResourceConsumptionLocal(ruleContext, OS.WINDOWS, 1000))
.isEqualTo(ResourceSet.createWithRamCpu(2500, 1));
}

Expand Down