Skip to content

Commit cb5a033

Browse files
comiuscopybara-github
authored andcommitted
Optimise Depsets in Starlark providers
Add unit tests for provider optimisation On schemaful providers store predicted element classes of a Depset next to the schema and encode the actual Depset with a NestedSet. The prediction is based on the first non-empty Depset in a provider. The optimisation either works or if it doesn't it causes no harm. PiperOrigin-RevId: 511732456 Change-Id: I1ce34d9d117645429759d49a7fb461becdb71aa7
1 parent 318f863 commit cb5a033

File tree

4 files changed

+354
-6
lines changed

4 files changed

+354
-6
lines changed

src/main/java/com/google/devtools/build/lib/collect/nestedset/Depset.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,11 @@ public ElementType getElementType() {
334334
return ElementType.of(elemClass);
335335
}
336336

337+
@Nullable
338+
public Class<?> getElementClass() {
339+
return elemClass;
340+
}
341+
337342
@Override
338343
public String toString() {
339344
return Starlark.repr(this);

src/main/java/com/google/devtools/build/lib/packages/StarlarkInfoWithSchema.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,21 @@
2828
import net.starlark.java.syntax.Location;
2929
import net.starlark.java.syntax.TokenKind;
3030

31-
/** A struct-like Info (provider instance) for providers defined in Starlark that have a schema. */
31+
/**
32+
* A struct-like Info (provider instance) for providers defined in Starlark that have a schema.
33+
*
34+
* <p>Maintainer's note: This class is memory-optimized in a way that can cause profiling
35+
* instability in some pathological cases. See {@link StarlarkProvider#optimizeField} for more
36+
* information.
37+
*/
3238
public class StarlarkInfoWithSchema extends StarlarkInfo {
3339
private final StarlarkProvider provider;
3440

35-
// For each field in provider.getFields the table contains on corresponding position either null
36-
// or a legal Starlark value
41+
// For each field in provider.getFields the table contains on corresponding position either null,
42+
// a legal Starlark value, or an optimized value (see StarlarkProvider#optimizeField).
3743
private final Object[] table;
3844

45+
// `table` elements should already be optimized by caller, see StarlarkProvider#optimizeField
3946
private StarlarkInfoWithSchema(
4047
StarlarkProvider provider, Object[] table, @Nullable Location loc) {
4148
super(loc);
@@ -68,7 +75,7 @@ static StarlarkInfoWithSchema createFromNamedArgs(
6875
"got multiple values for parameter %s in call to instantiate provider %s",
6976
table[i], provider.getPrintableName());
7077
}
71-
valueTable[pos] = table[i + 1];
78+
valueTable[pos] = provider.optimizeField(pos, table[i + 1]);
7279
} else {
7380
if (unexpected == null) {
7481
unexpected = new ArrayList<>();
@@ -106,7 +113,9 @@ public boolean isImmutable() {
106113
return false;
107114
}
108115
for (int i = 0; i < table.length; i++) {
109-
if (table[i] != null && !Starlark.isImmutable(table[i])) {
116+
if (table[i] != null
117+
&& !(provider.isOptimised(i, table[i]) // optimised fields might not be Starlark values
118+
|| Starlark.isImmutable(table[i]))) {
110119
return false;
111120
}
112121
}
@@ -118,7 +127,7 @@ public boolean isImmutable() {
118127
public Object getValue(String name) {
119128
ImmutableList<String> fields = provider.getFields();
120129
int i = Collections.binarySearch(fields, name);
121-
return i >= 0 ? table[i] : null;
130+
return i >= 0 ? provider.retrieveOptimizedField(i, table[i]) : null;
122131
}
123132

124133
@Nullable

src/main/java/com/google/devtools/build/lib/packages/StarlarkProvider.java

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@
1818
import com.google.common.base.Preconditions;
1919
import com.google.common.collect.ImmutableList;
2020
import com.google.devtools.build.lib.cmdline.Label;
21+
import com.google.devtools.build.lib.collect.nestedset.Depset;
22+
import com.google.devtools.build.lib.collect.nestedset.Depset.ElementType;
23+
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
2124
import com.google.devtools.build.lib.events.EventHandler;
2225
import com.google.devtools.build.lib.util.Fingerprint;
2326
import com.google.errorprone.annotations.CanIgnoreReturnValue;
2427
import java.util.Collection;
2528
import java.util.Map;
2629
import java.util.Objects;
30+
import java.util.concurrent.atomic.AtomicReferenceArray;
2731
import javax.annotation.Nullable;
2832
import net.starlark.java.eval.Dict;
2933
import net.starlark.java.eval.EvalException;
@@ -69,6 +73,32 @@ public final class StarlarkProvider implements StarlarkCallable, StarlarkExporta
6973
/** Null iff this provider has not yet been exported. Mutated by {@link export}. */
7074
@Nullable private Key key;
7175

76+
/**
77+
* For schemaful providers, an array of metadata concerning depset optimization.
78+
*
79+
* <p>Each index in the array holds an optional (nullable) depset element type. The value at that
80+
* index is initialized to be the element type of the first non-empty Depset to ever be stored in
81+
* the corresponding field from {@link #schema} on any instance of this provider, globally. If no
82+
* depsets (or only empty depsets) are ever stored in a field, the value at its index in this
83+
* array will remain null.
84+
*
85+
* <p>Whenever a field is stored in an instance of this provider type, if the value is a depset
86+
* whose element type matches the one stored in this array, it is optimized by unwrapping it down
87+
* to its {@code NestedSet}. Upon retrieval, the depset wrapper is reconstructed using this saved
88+
* element type.
89+
*
90+
* <p>The optimization may (harmlessly) fail to apply for provider fields that are not strongly
91+
* typed across all instances.
92+
*
93+
* <p>For large builds, this optimization has been observed to save half a percent in retained
94+
* heap.
95+
*
96+
* <p>In the future, the ad hoc heuristic of examining the first stored non-empty depset might be
97+
* replaced by stronger type information in the provider's Starlark declaration. However, this
98+
* optimization would remain relevant for provider declarations that do not supply such type info.
99+
*/
100+
@Nullable private transient AtomicReferenceArray<Class<?>> depsetTypePredictor;
101+
72102
/**
73103
* Returns a new empty builder.
74104
*
@@ -157,6 +187,9 @@ private StarlarkProvider(
157187
this.schema = schema;
158188
this.init = init;
159189
this.key = key;
190+
if (schema != null) {
191+
depsetTypePredictor = new AtomicReferenceArray<>(schema.size());
192+
}
160193
}
161194

162195
private static Object[] toNamedArgs(Object value, String descriptionForError)
@@ -323,6 +356,67 @@ public String toString() {
323356
return Starlark.repr(this);
324357
}
325358

359+
/**
360+
* For schemaful providers, given a value to store in the field identified by {@code index},
361+
* returns a possibly optimized version of the value. The result (optimized or not) should be
362+
* decoded by {@link #retrieveOptimizedField}.
363+
*
364+
* <p>Mutable values are never optimized.
365+
*/
366+
Object optimizeField(int index, Object value) {
367+
if (value instanceof Depset) {
368+
Preconditions.checkArgument(depsetTypePredictor != null);
369+
Depset depset = (Depset) value;
370+
if (depset.isEmpty()) {
371+
// Most empty depsets have the empty (null) type. We can't store this type because it
372+
// would clash with whatever the actual element type is for non-empty depsets in that
373+
// field. So instead just store the optimized (unwrapped) NestedSet without any type
374+
// information, and assume it's the empty type upon retrieval.
375+
//
376+
// This only loses information in the relatively rare case of a native-constructed empty
377+
// depset with a type restriction (e.g. empty set of artifacts). In that scenario, an
378+
// empty depset retrieved from the provider may "incorrectly" allow itself to participate
379+
// in a union with depsets of other types, whereas the original depset would trigger a
380+
// Starlark eval error. This is a user-observable difference but a very minor one; the
381+
// hazard would be logical errors that are masked by the provider machinery but triggered
382+
// by a refactoring of Starlark code. See TODO in Depset#of(Class, NestedSet) for notes
383+
// about eliminating this semantic confusion.
384+
//
385+
// This problem shouldn't arise for non-empty depsets since distinct non-empty element
386+
// types are not compatible with one another (i.e. there's no Depset<Any> schema).
387+
return depset.getSet();
388+
}
389+
Class<?> elementClass = depset.getElementClass();
390+
if (depsetTypePredictor.compareAndExchange(index, null, elementClass) == elementClass) {
391+
return depset.getSet();
392+
}
393+
}
394+
return value;
395+
}
396+
397+
Object retrieveOptimizedField(int index, Object value) {
398+
if (value instanceof NestedSet<?>) {
399+
// We subvert Depset.of()'s static type checking for consistency between the type token and
400+
// NestedSet type. This is safe because these values came from a previous Depset, so we
401+
// already know they're consistent.
402+
@SuppressWarnings("unchecked")
403+
NestedSet<Object> nestedSet = (NestedSet<Object>) value;
404+
if (nestedSet.isEmpty()) {
405+
// This matches empty depsets created in Starlark with `depset()`. For natively created
406+
// empty depsets it may change elementClass to null.
407+
return Depset.of(ElementType.EMPTY, nestedSet);
408+
}
409+
@SuppressWarnings("unchecked") // can't parametrize Class literal by a non-raw type
410+
Depset depset = Depset.of((Class<Object>) depsetTypePredictor.get(index), nestedSet);
411+
return depset;
412+
}
413+
return value;
414+
}
415+
416+
boolean isOptimised(int index, Object value) {
417+
return value instanceof NestedSet<?>;
418+
}
419+
326420
/**
327421
* A serializable representation of Starlark-defined {@link StarlarkProvider} that uniquely
328422
* identifies all {@link StarlarkProvider}s that are exposed to SkyFrame.

0 commit comments

Comments
 (0)