Skip to content

Commit

Permalink
Improve performance and semantics of union of Skylark sets
Browse files Browse the repository at this point in the history
== Before this change ==

Previously, if a and b are sets, then a + b created a new set c whose direct and transitive elements were all those of a, and with b appended as an additional transitive element. If on the other hand b is a list instead of a set, then its contents were appended as additional direct elements of c. In both cases, you can think of c as a copy of a that also knows about b.

This copying of a's elements into c can lead to accumulation when you do it repeatedly, e.g. x += y in a loop. Each union can take O(n) time so you get O(n^2) time overall. Nested set union is supposed to be O(1) time per operation and O(n) time overall.

It also leads to surprising iteration orders. If you do a + b + c + d (left-associative), where each one is a set, then when you do a post-order traversal you get the elements of b, c, d, and a, in that order. This is because b, c, and d each get appended as transitive children of the copies of a.

== After this change ==

If a and b are sets, then a + b returns a new set c with a and b as its two transitive children. If b is a list, then c has a as its only transitive child and b's elements as its only direct elements. This is straightforward, O(1), and avoids the problem with the confusing order.

It is implemented by removing the items/transitiveItems fields and just relying on NestedSetBuilder.

RELNOTES[INC]: (Skylark) Set union is now O(1). As a side-effect, the iteration order of sets produced by union has changed. "print(set([1]) + set([2]) + set([3]))" will now give back the order 1, 2, 3 instead of 2, 3, 1.

--
PiperOrigin-RevId: 143972165
MOS_MIGRATED_REVID=143972165
  • Loading branch information
brandjon authored and hlopko committed Jan 10, 2017
1 parent 8d487c5 commit 0d1dc55
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 174 deletions.
Expand Up @@ -24,12 +24,9 @@
import com.google.devtools.build.lib.skylarkinterface.SkylarkModuleCategory;
import com.google.devtools.build.lib.skylarkinterface.SkylarkValue;
import com.google.devtools.build.lib.util.Preconditions;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;

/** A generic type safe NestedSet wrapper for Skylark. */
@SkylarkModule(
Expand Down Expand Up @@ -85,47 +82,41 @@ public final class SkylarkNestedSet implements Iterable<Object>, SkylarkValue, S

private final SkylarkType contentType;
private final NestedSet<?> set;
@Nullable
private final List<Object> items;
@Nullable
private final List<NestedSet> transitiveItems;

public SkylarkNestedSet(Order order, Object item, Location loc) throws EvalException {
this(order, SkylarkType.TOP, item, loc, null);
this(SkylarkType.TOP, item, loc, new NestedSetBuilder<Object>(order));
}

public SkylarkNestedSet(SkylarkNestedSet left, Object right, Location loc) throws EvalException {
this(left.set.getOrder(), left.contentType, right, loc, left);
this(
left.contentType,
right,
loc,
new NestedSetBuilder<Object>(left.getOrder()).addTransitive(left.set));
}

// This is safe because of the type checking
@SuppressWarnings("unchecked")
private SkylarkNestedSet(Order order, SkylarkType contentType, Object item, Location loc,
@Nullable SkylarkNestedSet left) throws EvalException {

ArrayList<Object> items = new ArrayList<>();
ArrayList<NestedSet> transitiveItems = new ArrayList<>();
if (left != null) {
if (left.items == null) { // SkylarkSet created from native NestedSet
transitiveItems.add(left.set);
} else { // Preserving the left-to-right addition order.
items.addAll(left.items);
transitiveItems.addAll(left.transitiveItems);
}
}
private SkylarkNestedSet(
SkylarkType contentType, Object item, Location loc, NestedSetBuilder setBuilder)
throws EvalException {
// Adding the item
if (item instanceof SkylarkNestedSet) {
SkylarkNestedSet nestedSet = (SkylarkNestedSet) item;
if (!nestedSet.isEmpty()) {
contentType = checkType(contentType, nestedSet.contentType, loc);
transitiveItems.add(nestedSet.set);
try {
setBuilder.addTransitive((NestedSet<Object>) nestedSet.set);
} catch (IllegalStateException e) {
// Order mismatch between item and setBuilder.
throw new EvalException(loc, e.getMessage());
}
}
} else if (item instanceof SkylarkList) {
// TODO(bazel-team): we should check ImmutableList here but it screws up genrule at line 43
for (Object object : (SkylarkList) item) {
contentType = checkType(contentType, SkylarkType.of(object.getClass()), loc);
checkImmutable(object, loc);
items.add(object);
setBuilder.add(object);
}
} else {
throw new EvalException(
Expand All @@ -134,20 +125,7 @@ private SkylarkNestedSet(Order order, SkylarkType contentType, Object item, Loca
"cannot add value of type '%s' to a depset", EvalUtils.getDataTypeName(item)));
}
this.contentType = Preconditions.checkNotNull(contentType, "type cannot be null");

// Initializing the real nested set
NestedSetBuilder<Object> builder = new NestedSetBuilder<>(order);
builder.addAll(items);
try {
for (NestedSet<?> nestedSet : transitiveItems) {
builder.addTransitive(nestedSet);
}
} catch (IllegalStateException e) {
throw new EvalException(loc, e.getMessage());
}
this.set = builder.build();
this.items = ImmutableList.copyOf(items);
this.transitiveItems = ImmutableList.copyOf(transitiveItems);
this.set = setBuilder.build();
}

/**
Expand All @@ -172,8 +150,6 @@ public static <T> SkylarkNestedSet of(Class<T> contentType, NestedSet<T> set) {
// This is here for the sake of FuncallExpression.
this.contentType = Preconditions.checkNotNull(contentType, "type cannot be null");
this.set = Preconditions.checkNotNull(set, "set cannot be null");
this.items = null;
this.transitiveItems = null;
}

/**
Expand Down
Expand Up @@ -871,7 +871,7 @@ public void testInSet() throws Exception {
@Test
public void testUnionSet() throws Exception {
new SkylarkTest()
.testStatement("str(depset([1, 3]) | depset([1, 2]))", "set([1, 2, 3])")
.testStatement("str(depset([1, 3]) | depset([1, 2]))", "set([1, 3, 2])")
.testStatement("str(depset([1, 2]) | [1, 3])", "set([1, 2, 3])")
.testIfExactError("unsupported operand type(s) for |: 'int' and 'int'", "2 | 4");
}
Expand Down

0 comments on commit 0d1dc55

Please sign in to comment.