Skip to content

Commit 0e00d65

Browse files
liyafan82julianhyde
authored andcommitted
[CALCITE-3827] Reduce the time complexity of finding in-edges of a vertex in the graph (Liya Fan)
Add benchmark to measure performance difference; Place inward & outward edges in VertexInfo; Switch algorithms when removing vertices. Based on benchmarking, change threshold to 35%, and convert collection to HashSet; add benchmarking README. (Julian Hyde)
1 parent 650ce46 commit 0e00d65

File tree

5 files changed

+454
-26
lines changed

5 files changed

+454
-26
lines changed

core/src/main/java/org/apache/calcite/util/graph/AttributedDirectedGraph.java

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ public E addEdge(V vertex, V targetVertex, Object... attributes) {
6161
if (info == null) {
6262
throw new IllegalArgumentException("no vertex " + vertex);
6363
}
64-
final VertexInfo<V, E> info2 = vertexMap.get(targetVertex);
65-
if (info2 == null) {
64+
final VertexInfo<V, E> targetInfo = vertexMap.get(targetVertex);
65+
if (targetInfo == null) {
6666
throw new IllegalArgumentException("no vertex " + targetVertex);
6767
}
6868
@SuppressWarnings("unchecked")
@@ -71,6 +71,7 @@ public E addEdge(V vertex, V targetVertex, Object... attributes) {
7171
final E edge = f.createEdge(vertex, targetVertex, attributes);
7272
if (edges.add(edge)) {
7373
info.outEdges.add(edge);
74+
targetInfo.inEdges.add(edge);
7475
return edge;
7576
} else {
7677
return null;
@@ -86,18 +87,31 @@ public Iterable<E> getEdges(V source, final V target) {
8687
/** Removes all edges from a given vertex to another.
8788
* Returns whether any were removed. */
8889
public boolean removeEdge(V source, V target) {
89-
final VertexInfo<V, E> info = vertexMap.get(source);
90-
List<E> outEdges = info.outEdges;
91-
int removeCount = 0;
90+
// remove out edges
91+
final List<E> outEdges = vertexMap.get(source).outEdges;
92+
int removeOutCount = 0;
9293
for (int i = 0, size = outEdges.size(); i < size; i++) {
9394
E edge = outEdges.get(i);
9495
if (edge.target.equals(target)) {
9596
outEdges.remove(i);
9697
edges.remove(edge);
97-
++removeCount;
98+
++removeOutCount;
9899
}
99100
}
100-
return removeCount > 0;
101+
102+
// remove in edges
103+
final List<E> inEdges = vertexMap.get(target).inEdges;
104+
int removeInCount = 0;
105+
for (int i = 0, size = inEdges.size(); i < size; i++) {
106+
E edge = inEdges.get(i);
107+
if (edge.source.equals(source)) {
108+
inEdges.remove(i);
109+
++removeInCount;
110+
}
111+
}
112+
113+
assert removeOutCount == removeInCount;
114+
return removeOutCount > 0;
101115
}
102116

103117
/** Factory for edges that have attributes.

core/src/main/java/org/apache/calcite/util/graph/DefaultDirectedGraph.java

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.ArrayList;
2222
import java.util.Collection;
2323
import java.util.Collections;
24+
import java.util.HashSet;
2425
import java.util.LinkedHashMap;
2526
import java.util.LinkedHashSet;
2627
import java.util.List;
@@ -95,13 +96,14 @@ public E addEdge(V vertex, V targetVertex) {
9596
if (info == null) {
9697
throw new IllegalArgumentException("no vertex " + vertex);
9798
}
98-
final VertexInfo<V, E> info2 = vertexMap.get(targetVertex);
99-
if (info2 == null) {
99+
final VertexInfo<V, E> targetInfo = vertexMap.get(targetVertex);
100+
if (targetInfo == null) {
100101
throw new IllegalArgumentException("no vertex " + targetVertex);
101102
}
102103
final E edge = edgeFactory.createEdge(vertex, targetVertex);
103104
if (edges.add(edge)) {
104105
info.outEdges.add(edge);
106+
targetInfo.inEdges.add(edge);
105107
return edge;
106108
} else {
107109
return null;
@@ -120,28 +122,89 @@ public E getEdge(V source, V target) {
120122
}
121123

122124
public boolean removeEdge(V source, V target) {
123-
final VertexInfo<V, E> info = vertexMap.get(source);
124-
List<E> outEdges = info.outEdges;
125+
// remove out edges
126+
final List<E> outEdges = vertexMap.get(source).outEdges;
127+
boolean outRemoved = false;
125128
for (int i = 0, size = outEdges.size(); i < size; i++) {
126129
E edge = outEdges.get(i);
127130
if (edge.target.equals(target)) {
128131
outEdges.remove(i);
129132
edges.remove(edge);
130-
return true;
133+
outRemoved = true;
134+
break;
135+
}
136+
}
137+
138+
// remove in edges
139+
final List<E> inEdges = vertexMap.get(target).inEdges;
140+
boolean inRemoved = false;
141+
for (int i = 0, size = inEdges.size(); i < size; i++) {
142+
E edge = inEdges.get(i);
143+
if (edge.source.equals(source)) {
144+
inEdges.remove(i);
145+
inRemoved = true;
146+
break;
131147
}
132148
}
133-
return false;
149+
assert outRemoved == inRemoved;
150+
return outRemoved;
134151
}
135152

136153
public Set<V> vertexSet() {
137154
return vertexMap.keySet();
138155
}
139156

140157
public void removeAllVertices(Collection<V> collection) {
158+
// The point at which collection is large enough to make the 'majority'
159+
// algorithm more efficient.
160+
final float threshold = 0.35f;
161+
final int thresholdSize = (int) (vertexMap.size() * threshold);
162+
if (collection.size() > thresholdSize && !(collection instanceof Set)) {
163+
// Convert collection to a set, so that collection.contains() is
164+
// faster. If there are duplicates, collection.size() will get smaller.
165+
collection = new HashSet<>(collection);
166+
}
167+
if (collection.size() > thresholdSize) {
168+
removeMajorityVertices((Set<V>) collection);
169+
} else {
170+
removeMinorityVertices(collection);
171+
}
172+
}
173+
174+
/** Implementation of {@link #removeAllVertices(Collection)} that is efficient
175+
* if {@code collection} is a small fraction of the set of vertices. */
176+
private void removeMinorityVertices(Collection<V> collection) {
177+
for (V v : collection) {
178+
final VertexInfo<V, E> info = vertexMap.get(v);
179+
if (info == null) {
180+
continue;
181+
}
182+
183+
// remove all edges pointing to v
184+
for (E edge : info.inEdges) {
185+
final V source = (V) edge.source;
186+
final VertexInfo<V, E> sourceInfo = vertexMap.get(source);
187+
sourceInfo.outEdges.removeIf(e -> e.target.equals(v));
188+
}
189+
190+
// remove all edges starting from v
191+
for (E edge : info.outEdges) {
192+
final V target = (V) edge.target;
193+
final VertexInfo<V, E> targetInfo = vertexMap.get(target);
194+
targetInfo.inEdges.removeIf(e -> e.source.equals(v));
195+
}
196+
}
141197
vertexMap.keySet().removeAll(collection);
198+
}
199+
200+
/** Implementation of {@link #removeAllVertices(Collection)} that is efficient
201+
* if {@code vertexSet} is a large fraction of the set of vertices in the
202+
* graph. */
203+
private void removeMajorityVertices(Set<V> vertexSet) {
204+
vertexMap.keySet().removeAll(vertexSet);
142205
for (VertexInfo<V, E> info : vertexMap.values()) {
143-
//noinspection SuspiciousMethodCalls
144-
info.outEdges.removeIf(next -> collection.contains(next.target));
206+
info.outEdges.removeIf(e -> vertexSet.contains((V) e.target));
207+
info.inEdges.removeIf(e -> vertexSet.contains((V) e.source));
145208
}
146209
}
147210

@@ -150,15 +213,7 @@ public List<E> getOutwardEdges(V source) {
150213
}
151214

152215
public List<E> getInwardEdges(V target) {
153-
final ArrayList<E> list = new ArrayList<>();
154-
for (VertexInfo<V, E> info : vertexMap.values()) {
155-
for (E edge : info.outEdges) {
156-
if (edge.target.equals(target)) {
157-
list.add(edge);
158-
}
159-
}
160-
}
161-
return list;
216+
return vertexMap.get(target).inEdges;
162217
}
163218

164219
final V source(E edge) {
@@ -172,12 +227,13 @@ final V target(E edge) {
172227
}
173228

174229
/**
175-
* Information about an edge.
230+
* Information about a vertex.
176231
*
177232
* @param <V> Vertex type
178233
* @param <E> Edge type
179234
*/
180235
static class VertexInfo<V, E> {
181-
public List<E> outEdges = new ArrayList<>();
236+
final List<E> outEdges = new ArrayList<>();
237+
final List<E> inEdges = new ArrayList<>();
182238
}
183239
}

core/src/main/java/org/apache/calcite/util/graph/DirectedGraph.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ public interface DirectedGraph<V, E> {
4949

5050
Set<V> vertexSet();
5151

52+
/** Removes from this graph all vertices that are in {@code collection},
53+
* and the edges into and out of those vertices. */
5254
void removeAllVertices(Collection<V> collection);
5355

5456
List<E> getOutwardEdges(V source);

ubenchmark/README.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<!--
2+
{% comment %}
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to you under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
{% endcomment %}
18+
-->
19+
# Micro-benchmarks
20+
21+
This directory, `ubenchmark`, contains micro-benchmarks written using
22+
the [jmh](https://openjdk.java.net/projects/code-tools/jmh/) framework.
23+
24+
The benchmarks are tools for development and are not distributed as
25+
Calcite artifacts. (Besides, jmh's license does not allow that.)
26+
27+
## Running all benchmark from the command line
28+
29+
To run all benchmarks:
30+
31+
{noformat}bash
32+
$ cd calcite
33+
$ ./gradlew :ubenchmark:jmh
34+
{noformat}
35+
36+
## Running one benchmark from the command line
37+
38+
To run just one benchmark, modify `ubenchmark/build.gradle.kts` and add the
39+
following task:
40+
41+
{noformat}kotlin
42+
jmh {
43+
include = listOf("removeAllVertices.*Benchmark")
44+
}
45+
{noformat}
46+
47+
and run
48+
49+
{noformat}bash
50+
$ ./gradlew :ubenchmark:jmh
51+
{noformat}
52+
53+
as before. In this case, `removeAllVertices.*Benchmark` is a
54+
regular expression that matches a few methods -- benchmarks -- in
55+
`class DefaultDirectedGraphBenchmark`.
56+
57+
The `jmd-gradle-plugin` has
58+
[many other options](https://github.com/melix/jmh-gradle-plugin#configuration-options)
59+
but you will need to translate them from Groovy syntax to our Kotlin syntax.
60+
61+
## Recording results
62+
63+
When you have run the benchmarks, please record them in the relevant JIRA
64+
case and link them here:
65+
66+
* ParserBenchmark:
67+
[459](https://issues.apache.org/jira/browse/CALCITE-459),
68+
[1012](https://issues.apache.org/jira/browse/CALCITE-1012)
69+
* ArrayListTest:
70+
[3878](https://issues.apache.org/jira/browse/CALCITE-3878)
71+
* DefaultDirectedGraphBenchmark:
72+
[3827](https://issues.apache.org/jira/browse/CALCITE-3827)
73+
* RelNodeBenchmark:
74+
[3836](https://issues.apache.org/jira/browse/CALCITE-3836)
75+
* ReflectVisitorDispatcherTest:
76+
[3873](https://issues.apache.org/jira/browse/CALCITE-3873)

0 commit comments

Comments
 (0)