Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

- Added target for the nearest neighbor finder tool

- Reduced the default timeout for long-running tests

- Added a jaccardIndex overload for two sets of elements

- Fixed javadoc

- Made class serializable

- Added support for maximum path length

- Reworked to use the new SimpleDependencyPath class

- Revised error message for clarity

- Updated to use new SimpleDependencyPath features

- Updated to use new SimpleDependencyPath features

- Major rewrite for clarity

- Fixed toString()

- Added static methods for testing the category of particular POS tag

- Implemented missing methods

- Added more imports (not sure why...)

- Added logging

- Added support for shuffling the edges of a graph using a fixed Random for reproducability

- Multithreaded the edge similarity comparison

- Probably some bug fixes too

- Updated to use IntPair instead of Pair<Integer>

- Updated to use IntSet

- Major overhaul to bring performance in line with the earlier primitive
  collection enhancements

- Probably needs a lot of clean up still

- Updates to use new primitive collections

- Updates to use new primitive collections

- Added support for clustering to a fixed number of clusters

- Added (limited) support for writing in GEXF format

- Added (limited) support for writing in Pajek format

- Made default output verbose

- Remove extra temporary file that was just hanging around unneeded

- Added support for getting the String that backs the document

- Added a new class for testing the association between two terms, which is
  already partially supported by the BigramExtractor, but this class supports
  limiting the number of items being associated which enables better scaling
  through iterative association testing.

- This class needs a lot of work.

- Added an iterator for the documents in WaCkypedia

- Fixed HTML bug in javadoc

- Added new tool for running the NearestNeighborFinder from the command line

- Fixed javadoc

- Added support for changing the loging level of any logger namespace

- Added new tool for speeding up repeated nearest-neighbor computations by
  partitioning a SemanticSpace into clusters (using K-means) and then only
  searching a subset.

- Added support for reading and writing to streams and byte[] arrays

- Updated to be an IntSet

- Added Pair implementation

- Fixed iterator remove bug

- Added more extensive real-world unit tests

- Added unit test details that match the example in the Ahn et al. paper

- Added more tests
  • Loading branch information...
commit dc6cdb6cf315d6fdf7ee782ba4dde0da70ea574a 1 parent ec8c85f
David Jurgens authored
Showing with 3,154 additions and 832 deletions.
  1. +23 −5 build.xml
  2. +17 −1 src/edu/ucla/sspace/common/Similarity.java
  3. +4 −1 src/edu/ucla/sspace/common/VectorMapSemanticSpace.java
  4. +37 −16 src/edu/ucla/sspace/dependency/BreadthFirstPathIterator.java
  5. +2 −1  src/edu/ucla/sspace/dependency/DependencyExtractorManager.java
  6. +55 −17 src/edu/ucla/sspace/dependency/DependencyIterator.java
  7. +8 −23 src/edu/ucla/sspace/dependency/FilteredDependencyIterator.java
  8. +118 −85 src/edu/ucla/sspace/dependency/SimpleDependencyPath.java
  9. +31 −0 src/edu/ucla/sspace/dv/PennTags.java
  10. +13 −14 src/edu/ucla/sspace/graph/DirectedMultigraph.java
  11. +3 −0  src/edu/ucla/sspace/graph/GraphIO.java
  12. +80 −6 src/edu/ucla/sspace/graph/Graphs.java
  13. +96 −58 src/edu/ucla/sspace/graph/LinkClustering.java
  14. +6 −6 src/edu/ucla/sspace/graph/SimpleGraphIterator.java
  15. +4 −12 src/edu/ucla/sspace/graph/SparseDirectedTypedEdgeSet.java
  16. +625 −112 src/edu/ucla/sspace/graph/SparseTypedEdgeSet.java
  17. +13 −5 src/edu/ucla/sspace/graph/SubgraphIterator.java
  18. +429 −445 src/edu/ucla/sspace/graph/UndirectedMultigraph.java
  19. +17 −4 src/edu/ucla/sspace/graph/WeightedLinkClustering.java
  20. +261 −0 src/edu/ucla/sspace/graph/io/GexfIO.java
  21. +94 −0 src/edu/ucla/sspace/graph/io/PajekIO.java
  22. +2 −2 src/edu/ucla/sspace/mains/GenericMain.java
  23. +1 −0  src/edu/ucla/sspace/matrix/SvdlibjDriver.java
  24. +12 −0 src/edu/ucla/sspace/text/StringDocument.java
  25. +182 −0 src/edu/ucla/sspace/text/TermAssociationFinder.java
  26. +125 −0 src/edu/ucla/sspace/text/WaCkypediaDocumentIterator.java
  27. +1 −1  src/edu/ucla/sspace/tools/BigramExtractor.java
  28. +182 −0 src/edu/ucla/sspace/tools/NearestNeighborFinderTool.java
  29. +2 −1  src/edu/ucla/sspace/util/Counter.java
  30. +19 −0 src/edu/ucla/sspace/util/LoggerUtil.java
  31. +438 −0 src/edu/ucla/sspace/util/NearestNeighborFinder.java
  32. +72 −6 src/edu/ucla/sspace/util/SerializableUtil.java
  33. +9 −5 src/edu/ucla/sspace/util/primitive/CompactIntSet.java
  34. +67 −0 src/edu/ucla/sspace/util/primitive/IntPair.java
  35. +1 −1  src/edu/ucla/sspace/util/primitive/TroveIntSet.java
  36. +86 −2 test/edu/ucla/sspace/dependency/BreadthFirstPathIteratorTest.java
  37. +12 −0 test/edu/ucla/sspace/graph/LinkClusteringTests.java
  38. +7 −3 test/edu/ucla/sspace/graph/UndirectedMultigraphTests.java
View
28 build.xml
@@ -204,8 +204,7 @@
</artifact:mvn>
</target>
- <target name="tools" depends="sse-jar,svd-jar,tc-jar">
- </target>
+ <target name="tools" depends="sse-jar,svd-jar,tc-jar,nnf-jar"/>
<!--
**
@@ -217,7 +216,7 @@
<target name="jar" depends="compile">
- <jar destfile="${bin.dir}/sspace-lib.jar" basedir="classes">
+ <jar destfile="${dist.dir}/sspace-lib.jar" basedir="classes">
<include name="**/*.class"/>
<exclude name="jnt/*"/>
<manifest>
@@ -521,6 +520,21 @@
</jar>
</target>
+ <target name="nnf-jar" depends="compile">
+ <jar destfile="${tools.dir}/nnf.jar" basedir="classes">
+ <include name="**/*.class"/>
+ <manifest>
+ <!-- Who is building this jar? -->
+ <attribute name="Built-By" value="${user.name}"/>
+ <!-- Information about the program itself -->
+ <attribute name="Implementation-Vendor" value="AIRhead Research"/>
+ <attribute name="Implementation-Title" value="SVD"/>
+ <attribute name="Implementation-Version" value="${version}"/>
+ <attribute name="Main-Class" value="edu.ucla.sspace.tools.NearestNeighborFinderTool"/>
+ </manifest>
+ </jar>
+ </target>
+
<!--
**
**
@@ -567,11 +581,15 @@
<cobertura-report srcdir="${src.dir}" destdir="${coverage.html.dir}" />
</target>
+ <property name="junit.default.timeout" value="5000" /> <!-- 5 minute default timeout is 300000 -->
+
<target name="test" depends="compile-tests">
<delete dir="reports"/>
<mkdir dir="reports"/>
- <junit printsummary="yes" fork="yes" haltonfailure="no">
+ <junit printsummary="yes" fork="yes" haltonfailure="no" timeout="5000">
+ <!-- <sysproperty key="junit.default.timeout" value="${junit.default.timeout}" />-->
+
<jvmarg value="-Xmx1g"/>
<jvmarg value="-debug"/>
<classpath location="${build.instrumented.dir}"/>
@@ -587,7 +605,7 @@
<include name="**/*Tests.java"/>
</fileset>
</batchtest>
-
+ <!-- -->
</junit>
</target>
View
18 src/edu/ucla/sspace/common/Similarity.java
@@ -878,6 +878,21 @@ public static double euclideanSimilarity(Vector a, Vector b) {
/**
* Computes the <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard
+ * index</a> of the two sets of elements.
+ */
+ public static double jaccardIndex(Set<?> a, Set<?> b) {
+ int intersection = 0;
+ for (Object o : a) {
+ if (b.contains(o))
+ intersection++;
+ }
+
+ double union = a.size() + b.size() - intersection;
+ return intersection / union;
+ }
+
+ /**
+ * Computes the <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard
* index</a> comparing the similarity both arrays when viewed as sets of
* samples.
*/
@@ -2264,7 +2279,8 @@ else if (a instanceof SparseVector && b instanceof SparseVector) {
}
/**
- * Returns the cosine similarity of the two {@code IntegerVector} instances
+ * Returns the Tanimoto Coefficient of the two {@code IntegerVector}
+ * instances
*
* @throws IllegalArgumentException when the length of the two vectors are
* not the same.
View
5 src/edu/ucla/sspace/common/VectorMapSemanticSpace.java
@@ -62,7 +62,10 @@
*
* @author Keith Stevens
*/
-public class VectorMapSemanticSpace<T extends Vector> implements SemanticSpace {
+public class VectorMapSemanticSpace<T extends Vector>
+ implements SemanticSpace, java.io.Serializable {
+
+ private static final long serialVersionUID = 1L;
private static final Logger LOGGER =
Logger.getLogger(VectorMapSemanticSpace.class.getName());
View
53 src/edu/ucla/sspace/dependency/BreadthFirstPathIterator.java
@@ -50,15 +50,38 @@
* The paths that have been expanded from the starting node but have not yet
* been returned.
*/
- protected final Queue<DependencyPath> frontier;
-
+ protected final Queue<SimpleDependencyPath> frontier;
+
+ /**
+ * The maximum length of any path returned by the iterator.
+ */
+ private final int maxPathLength;
+
/**
* Creates a new iterator over all the paths starting at the provided index.
*
* @param startNode the node that will start all the paths to be generated.
*/
public BreadthFirstPathIterator(DependencyTreeNode startNode) {
- frontier = new ArrayDeque<DependencyPath>();
+ this(startNode, Integer.MAX_VALUE);
+ }
+
+ /**
+ * Creates a new iterator over all the paths starting at the provided index
+ * that will only return paths up to the specified maximum length.
+ *
+ * @param startNode the node that will start all the paths to be generated.
+ * @param maxPathLength the maximum path length to return
+ *
+ * @throws IllegalArgumentException if {@maxPathLength} is &lt; 1.
+ */
+ public BreadthFirstPathIterator(DependencyTreeNode startNode,
+ int maxPathLength) {
+ if (maxPathLength < 1)
+ throw new IllegalArgumentException(
+ "Must specify a path length greater than or equal to 1");
+ this.maxPathLength = maxPathLength;
+ frontier = new ArrayDeque<SimpleDependencyPath>();
// Base-case: find all the paths of length 1
for (DependencyRelation rel : startNode.neighbors()) {
@@ -66,10 +89,8 @@ public BreadthFirstPathIterator(DependencyTreeNode startNode) {
// relationship or not. This ensures that the root is always the
// first node in the path and any expansion will continue away from
// the root.
- SimpleDependencyPath p = new SimpleDependencyPath(
- Collections.singletonList(rel),
- rel.headNode().equals(startNode));
- frontier.offer(p);
+ frontier.offer(new SimpleDependencyPath(
+ rel, rel.headNode().equals(startNode)));
}
}
@@ -77,22 +98,22 @@ public BreadthFirstPathIterator(DependencyTreeNode startNode) {
* Expands the breadth-first frontier by adding all the new paths one link
* away to the end of {@code frontier}.
*/
- /* package-private */ void advance(DependencyPath path) {
+ /* package-private */ void advance(SimpleDependencyPath path) {
+ if (path.length() >= maxPathLength)
+ return;
+
// Get the last node and last relation to decide how to expand.
- DependencyTreeNode last = path.last();
- DependencyRelation lastRel = path.lastRelation();
+ DependencyRelation lastRelation = path.lastRelation();
+ DependencyTreeNode last = path.last();
// Expand all of the possible relations from the last node, creating a
// new path for each, except if the relation is the one that generated
// this path.
for (DependencyRelation rel : last.neighbors()) {
// Skip re-adding the current relation
- if (lastRel.equals(rel))
+ if (lastRelation.equals(rel))
continue;
- // Use an extension of the path, rather than having to copy all of
- // the nodes again. This just creates a view of path with rel as
- // the last relation in path
- DependencyPath extended = new ExtendedPathView(path, rel);
+ SimpleDependencyPath extended = path.extend(rel);
frontier.offer(extended);
}
}
@@ -109,7 +130,7 @@ public boolean hasNext() {
* or greater than the previously returned path.
*/
public DependencyPath next() {
- DependencyPath p = frontier.remove();
+ SimpleDependencyPath p = frontier.remove();
// Expand the frontier 1 link starting from the current path
advance(p);
return p;
View
3  src/edu/ucla/sspace/dependency/DependencyExtractorManager.java
@@ -123,7 +123,8 @@ public static synchronized DependencyExtractor getExtractor(String name) {
*/
public static synchronized DependencyExtractor getDefaultExtractor() {
if (defaultExtractor == null)
- throw new IllegalStateException("No extractors available");
+ throw new IllegalStateException(
+ "No DependencyExtractors available.");
return defaultExtractor;
}
}
View
72 src/edu/ucla/sspace/dependency/DependencyIterator.java
@@ -21,6 +21,7 @@
package edu.ucla.sspace.dependency;
+import java.util.ArrayDeque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -38,7 +39,7 @@
*
* Note that this class is <b>NOT</b> thread safe.
*/
-public class DependencyIterator extends BreadthFirstPathIterator {
+public class DependencyIterator implements Iterator<DependencyPath> {
/**
* The maximum length of the returned paths. The length is considedered to
@@ -47,6 +48,12 @@
private final int maxPathLength;
/**
+ * The paths that have been expanded from the starting node but have not yet
+ * been returned.
+ */
+ protected final Queue<SimpleDependencyPath> frontier;
+
+ /**
* The {@link DependencyRelationAcceptor} that validates each link before it is
* traversed and returned as part of a {@link DependencyPath}.
*/
@@ -68,13 +75,24 @@
public DependencyIterator(DependencyTreeNode startNode,
DependencyRelationAcceptor acceptor,
int maxPathLength) {
- super(startNode);
if (maxPathLength < 1)
throw new IllegalArgumentException(
- "Must specify a path length greater than 1");
-
- this.acceptor = acceptor;
+ "Must specify a path length greater than or equal to 1");
this.maxPathLength = maxPathLength;
+ this.acceptor = acceptor;
+ frontier = new ArrayDeque<SimpleDependencyPath>();
+
+ // Base-case: find all the paths of length 1
+ for (DependencyRelation rel : startNode.neighbors()) {
+ // Orient the path depending on whether the root was the head of the
+ // relationship or not. This ensures that the root is always the
+ // first node in the path and any expansion will continue away from
+ // the root.
+ if (acceptor.accept(rel)) {
+ frontier.offer(new SimpleDependencyPath(
+ rel, rel.headNode().equals(startNode)));
+ }
+ }
}
/**
@@ -82,28 +100,48 @@ public DependencyIterator(DependencyTreeNode startNode,
* relations that are shorter than the maximum path length and that are
* accepted by the {@code DependencyRelationAcceptor} used by this instance.
*/
- @Override void advance(DependencyPath path) {
- // Skip processing paths that would exceed the maximum length
- if (path.length() == maxPathLength)
+ void advance(SimpleDependencyPath path) {
+ if (path.length() >= maxPathLength)
return;
// Get the last node and last relation to decide how to expand.
- DependencyTreeNode last = path.last();
- DependencyRelation lastRel = path.lastRelation();
+ DependencyRelation lastRelation = path.lastRelation();
+ DependencyTreeNode last = path.last();
// Expand all of the possible relations from the last node, creating a
// new path for each, except if the relation is the one that generated
// this path.
for (DependencyRelation rel : last.neighbors()) {
- // Skip re-adding the current relation and those relations that do
- // not pass the filter
- if (lastRel.equals(rel) || !acceptor.accept(rel))
+ // Skip re-adding the current relation
+ if (lastRelation.equals(rel) || !acceptor.accept(rel))
continue;
- // Use an extension of the path, rather than having to copy all of
- // the nodes again. This just creates a view of path with rel as
- // the last relation in path
- DependencyPath extended = new ExtendedPathView(path, rel);
+ SimpleDependencyPath extended = path.extend(rel);
frontier.offer(extended);
}
}
+
+ /**
+ * Returns {@code true} if there are still paths to return for the tree.
+ */
+ public boolean hasNext() {
+ return !frontier.isEmpty();
+ }
+
+ /**
+ * Returns the next {@code DependencyPath} in the tree whose length is equal
+ * or greater than the previously returned path.
+ */
+ public DependencyPath next() {
+ SimpleDependencyPath p = frontier.remove();
+ // Expand the frontier 1 link starting from the current path
+ advance(p);
+ return p;
+ }
+
+ /**
+ * Throws an {@code UnsupportedOperationException} if called
+ */
+ public void remove() {
+ throw new UnsupportedOperationException("Removal is not possible");
+ }
}
View
31 src/edu/ucla/sspace/dependency/FilteredDependencyIterator.java
@@ -42,12 +42,6 @@
public class FilteredDependencyIterator implements Iterator<DependencyPath> {
/**
- * The maximum length of the returned paths. The length is considedered to
- * not include the first term.
- */
- private final int maxPathLength;
-
- /**
* The {@link DependencyPathAcceptor} that validates each link before it is
* traversed and returned as part of a {@link DependencyPath}.
*/
@@ -89,19 +83,15 @@ public FilteredDependencyIterator(DependencyTreeNode startNode,
* @param startNode the node that will start all the paths to be generated.
* @param acceptor The {@link DependencyPathAcceptor} that will validate
* the paths returned by this iterator
- * @param maxPathLength the maximum number of nodes in any path
+ * @param maxPathLength the maximum number of relations in any path
*
* @throws IllegalArgumentException if {@code maxPathLength} is less than 1
*/
public FilteredDependencyIterator(DependencyTreeNode startNode,
DependencyPathAcceptor acceptor,
int maxPathLength) {
- if (maxPathLength < 1)
- throw new IllegalArgumentException(
- "Must specify a path length greater than 1");
- this.iterator = new BreadthFirstPathIterator(startNode);
+ this.iterator = new BreadthFirstPathIterator(startNode, maxPathLength);
this.acceptor = acceptor;
- this.maxPathLength = maxPathLength;
advance();
}
@@ -110,20 +100,15 @@ public FilteredDependencyIterator(DependencyTreeNode startNode,
* the value to {@code null} if no further paths exist.
*/
private void advance() {
- DependencyPath p = null;
+ next = null;
// While the underlying iterator has paths, check whether any are
- // accepted by the filter. If a path is over the maximum path length,
- // break, since no further returned paths will be smaller.
- while (iterator.hasNext()) {
- p = iterator.next();
- if (p.length() > maxPathLength) {
- p = null;
- break;
- } else if (acceptor.accepts(p))
- break;
+ // accepted by the filter.
+ while (iterator.hasNext() && next == null) {
+ DependencyPath p = iterator.next();
+ if (acceptor.accepts(p))
+ next = p;
}
- next = p;
}
/**
View
203 src/edu/ucla/sspace/dependency/SimpleDependencyPath.java
@@ -28,22 +28,21 @@
import java.util.Iterator;
/**
- * A simple {@link DependencyPath} that is created from a list
+ * A {@link DependencyPath} that supports constant time access to the nodes and
+ * relations that make up its sequence.
*/
public class SimpleDependencyPath implements DependencyPath {
/**
* The list of terms and relations.
*/
- private final List<DependencyRelation> path;
+ final List<DependencyRelation> path;
/**
- * {@code true} if the head of this path is the head node of the first
- * relation. Conversely, if {@code false}, the path begins at the dependent
- * node in the first relation.
+ * The list of terms and relations.
*/
- private final boolean isHeadFirst;
-
+ final List<DependencyTreeNode> nodes;
+
/**
* Creates a {@link SimpleDependencyPath} starting at the head node of the
* first relation in the list.
@@ -60,8 +59,20 @@ public SimpleDependencyPath(List<DependencyRelation> path,
boolean isHeadFirst) {
if (path == null || path.size() == 0)
throw new IllegalArgumentException("Cannot provide empty path");
- this.path = path;
- this.isHeadFirst = isHeadFirst;
+ this.path = new ArrayList<DependencyRelation>(path);
+ this.nodes = new ArrayList<DependencyTreeNode>(path.size() + 1);
+ DependencyTreeNode cur = (isHeadFirst)
+ ? path.get(0).headNode() : path.get(0).dependentNode();
+ nodes.add(cur);
+ for (DependencyRelation r : path) {
+ DependencyTreeNode next = r.headNode();
+ // If the head node is the last node we saw, then the dependent node
+ // must be the next node in the path
+ if (next.equals(cur))
+ next = r.dependentNode();
+ nodes.add(next);
+ next = cur;
+ }
}
/**
@@ -71,94 +82,123 @@ public SimpleDependencyPath(DependencyPath path) {
if (path == null || path.length() == 0)
throw new IllegalArgumentException("Cannot provide empty path");
- // Special case if we're cloning an instances of this class
- if (path instanceof SimpleDependencyPath) {
- SimpleDependencyPath p = (SimpleDependencyPath)path;
- // Copy over the relations
- this.path = new ArrayList<DependencyRelation>(p.path);
- // Ensure the iteration order stays the same.
- this.isHeadFirst = p.isHeadFirst;
+ int size = path.length();
+ this.path = new ArrayList<DependencyRelation>(size);
+ this.nodes = new ArrayList<DependencyTreeNode>(size + 1);
+ DependencyTreeNode cur = path.first();
+ nodes.add(cur);
+ for (DependencyRelation r : path) {
+ this.path.add(r);
+ DependencyTreeNode next = r.headNode();
+ // If the head node is the last node we saw, then the dependent node
+ // must be the next node in the path
+ if (next.equals(cur))
+ next = r.dependentNode();
+ nodes.add(next);
+ next = cur;
+ }
+ }
+
+ /**
+ * Creates a {@link SimpleDependencyPath} from a single relation, optionally
+ * starting at the head node of the relation.
+ */
+ public SimpleDependencyPath(DependencyRelation relation,
+ boolean startFromHead) {
+ this();
+ if (relation == null)
+ throw new IllegalArgumentException("Cannot provide empty path");
+ path.add(relation);
+ if (startFromHead) {
+ nodes.add(relation.headNode());
+ nodes.add(relation.dependentNode());
}
else {
- this.path = new ArrayList<DependencyRelation>(path.length());
- for (DependencyRelation r : path)
- this.path.add(r);
- // Decide whether the provided path starts with the head element of
- // the relation not
- DependencyRelation r = path.firstRelation();
- DependencyTreeNode n = path.first();
- this.isHeadFirst = r.headNode().equals(n);
+ nodes.add(relation.dependentNode());
+ nodes.add(relation.headNode());
}
}
/**
+ * Creates an empty dependency path
+ */
+ public SimpleDependencyPath() {
+ path = new ArrayList<DependencyRelation>();
+ nodes = new ArrayList<DependencyTreeNode>();
+ }
+
+ /**
+ * Returns a copy of this dependency path
+ */
+ public SimpleDependencyPath copy() {
+ SimpleDependencyPath copy = new SimpleDependencyPath();
+ copy.path.addAll(path);
+ copy.nodes.addAll(nodes);
+ return copy;
+ }
+
+ /**
+ * Returns a copy of this dependency path that has the provided related
+ * appended to the end of its path sequence.
+ */
+ public SimpleDependencyPath extend(DependencyRelation relation) {
+ SimpleDependencyPath copy = copy();
+ // Figure out which node is at the end of our path, and then add the new
+ // node to the end of our nodes
+ DependencyTreeNode last = last();
+ copy.nodes.add((relation.headNode().equals(last))
+ ? relation.dependentNode() : relation.headNode());
+ copy.path.add(relation);
+ return copy;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof DependencyPath) {
+ DependencyPath p = (DependencyPath)o;
+ if (p.length() != length())
+ return false;
+ DependencyTreeNode f = p.first();
+ DependencyTreeNode n = first();
+ if (!(f == n || (f != null && f.equals(n))))
+ return false;
+ Iterator<DependencyRelation> it1 = iterator();
+ Iterator<DependencyRelation> it2 = p.iterator();
+ while (it1.hasNext())
+ if (!(it1.next().equals(it2.next())))
+ return false;
+ return true;
+ }
+ return false;
+ }
+
+ /**
* {@inheritDoc}
*/
public DependencyTreeNode first() {
- DependencyRelation r = path.get(0);
- // Check whether the relation path starts at either the head or
- // dependent node
- return (isHeadFirst) ? r.headNode() : r.dependentNode();
+ return (nodes.isEmpty()) ? null : nodes.get(0);
}
/**
* {@inheritDoc}
*/
public DependencyRelation firstRelation() {
- return path.get(0);
+ return (path.isEmpty()) ? null : path.get(0);
}
/**
* {@inheritDoc}
*/
public DependencyTreeNode getNode(int position) {
- if (position < 0 || position > path.size() + 1)
+ if (position < 0 || position >= nodes.size())
throw new IndexOutOfBoundsException("Invalid node: " + position);
- // Special case for getting the very first node.
- if (position == 0)
- return (isHeadFirst)
- ? path.get(0).headNode()
- : path.get(0).dependentNode();
- if (position == 1)
- return (isHeadFirst)
- ? path.get(0).dependentNode()
- : path.get(0).headNode();
- // Special case for if only one relation exists in the path
- // Special case for if only one relation exists in the path
- if (path.size() == 1)
- return ((isHeadFirst && position == 1)
- || (!isHeadFirst && position == 0))
- ? path.get(0).dependentNode()
- : path.get(0).headNode();
- DependencyRelation prev = path.get(position - 2);
- DependencyRelation cur = path.get(position - 1);
- return getNextNode(prev, cur);
- }
-
- /**
- * Given the nodes in the previous relation, determines which of the nodes
- * in the next relation is new and return that. This method provides a way
- * of determine the next node in a path independent of the direction of the
- * path's dependency edges.
- *
- * @param prev the dependency relation that was previously seen in the path
- * @param cur the current dependency relation
- *
- * @return the node in {@code cur} that is not present in {@code prev}
- */
- private DependencyTreeNode getNextNode(DependencyRelation prev,
- DependencyRelation cur) {
- return (prev.headNode().equals(cur.headNode()) ||
- prev.dependentNode().equals(cur.headNode()))
- ? cur.dependentNode()
- : cur.headNode();
+ return nodes.get(position);
}
/**
* {@inheritDoc}
*/
public String getRelation(int position) {
- if (position < 0 || position > (path.size() - 1))
+ if (position < 0 || position >= path.size())
throw new IndexOutOfBoundsException("Invalid relation: " +position);
DependencyRelation r = path.get(position);
return r.relation();
@@ -175,13 +215,7 @@ public String getRelation(int position) {
* {@inheritDoc}
*/
public DependencyTreeNode last() {
- if (path.size() == 1)
- return (isHeadFirst)
- ? path.get(0).dependentNode()
- : path.get(0).headNode();
- DependencyRelation prev = path.get(path.size() - 2);
- DependencyRelation last = path.get(path.size() - 1);
- return getNextNode(prev, last);
+ return nodes.get(nodes.size() - 1);
}
/**
@@ -202,14 +236,13 @@ public int length() {
* Returns the path in order with words and relations space delimited.
*/
public String toString() {
- int size = length();
- StringBuilder sb = new StringBuilder(8 * size);
- sb.append('[');
- for (int i = 0; i < size; ++i) {
- sb.append(getNode(i).word());
- if (i + i < size)
- sb.append(' ').append(getRelation(i)).append(' ');
- }
- return sb.append(']').toString();
+ int size = nodes.size();
+ StringBuilder sb = new StringBuilder(8 * size);
+ sb.append(nodes.get(0).word());
+ for (int i = 1; i < size; ++i)
+ sb.append(' ')
+ .append(path.get(i-1).relation())
+ .append(' ').append(nodes.get(i).word());
+ return sb.toString();
}
}
View
31 src/edu/ucla/sspace/dv/PennTags.java
@@ -72,4 +72,35 @@
VERB_POS_TAGS.add("VVZ");
}
+ /**
+ * Returns {@code true} if this part of speech tag can be used to label an
+ * adjective in the Penn tag set.
+ */
+ public static boolean isAdjective(String posTag) {
+ return ADJ_POS_TAGS.contains(posTag);
+ }
+
+ /**
+ * Returns {@code true} if this part of speech tag can be used to label an
+ * adverb in the Penn tag set.
+ */
+ public static boolean isAdverb(String posTag) {
+ return ADV_POS_TAGS.contains(posTag);
+ }
+
+ /**
+ * Returns {@code true} if this part of speech tag can be used to label a
+ * noun in the Penn tag set.
+ */
+ public static boolean isNoun(String posTag) {
+ return NOUN_POS_TAGS.contains(posTag);
+ }
+
+ /**
+ * Returns {@code true} if this part of speech tag can be used to label a
+ * verb in the Penn tag set.
+ */
+ public static boolean isVerb(String posTag) {
+ return VERB_POS_TAGS.contains(posTag);
+ }
}
View
27 src/edu/ucla/sspace/graph/DirectedMultigraph.java
@@ -44,6 +44,7 @@
import edu.ucla.sspace.util.SetDecorator;
import edu.ucla.sspace.util.primitive.IntSet;
+import edu.ucla.sspace.util.primitive.PrimitiveCollections;
import edu.ucla.sspace.util.primitive.TroveIntSet;
import gnu.trove.TDecorators;
@@ -426,11 +427,10 @@ public int outDegree(int vertex) {
* {@inheritDoc}
*/
public IntSet predecessors(int vertex) {
- throw new Error();
-// SparseDirectedTypedEdgeSet<T> edges = vertexToEdges.get(vertex);
-// return (edges == null)
-// ? Collections.<Integer>emptySet()
-// : edges.predecessors();
+ SparseDirectedTypedEdgeSet<T> edges = vertexToEdges.get(vertex);
+ return (edges == null)
+ ? PrimitiveCollections.emptyIntSet()
+ : PrimitiveCollections.unmodifiableSet(edges.predecessors());
}
/**
@@ -537,11 +537,10 @@ public boolean execute(SparseDirectedTypedEdgeSet<T> edges) {
* {@inheritDoc}
*/
public IntSet successors(int vertex) {
-// SparseDirectedTypedEdgeSet<T> edges = vertexToEdges.get(vertex);
-// return (edges == null)
-// ? Collections.<Integer>emptySet()
-// : edges.successors();
- throw new Error();
+ SparseDirectedTypedEdgeSet<T> edges = vertexToEdges.get(vertex);
+ return (edges == null)
+ ? PrimitiveCollections.emptyIntSet()
+ : PrimitiveCollections.unmodifiableSet(edges.successors());
}
/**
@@ -932,10 +931,10 @@ public int degree(int vertex) {
* {@inheritDoc}
*/
public IntSet getNeighbors(int vertex) {
-// if (!vertexSubset.contains(vertex))
-// return Collections.<Integer>emptySet();
-// return new SubgraphNeighborsView(vertex);
- throw new Error();
+ SparseDirectedTypedEdgeSet<T> edges = vertexToEdges.get(vertex);
+ return (edges == null)
+ ? PrimitiveCollections.emptyIntSet()
+ : PrimitiveCollections.unmodifiableSet(edges.connected());
}
/**
View
3  src/edu/ucla/sspace/graph/GraphIO.java
@@ -21,6 +21,8 @@
package edu.ucla.sspace.graph;
+import java.awt.Color;
+
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
@@ -32,6 +34,7 @@
import java.util.Map;
import java.util.Set;
+import edu.ucla.sspace.util.ColorGenerator;
import edu.ucla.sspace.util.Indexer;
import edu.ucla.sspace.util.LineReader;
import edu.ucla.sspace.util.ObjectIndexer;
View
86 src/edu/ucla/sspace/graph/Graphs.java
@@ -31,9 +31,13 @@
import java.util.Random;
import java.util.Set;
+import java.util.logging.Logger;
+
import gnu.trove.map.TIntIntMap;
import gnu.trove.map.hash.TIntIntHashMap;
+import static edu.ucla.sspace.util.LoggerUtil.verbose;
+
/**
* A collection of static utility methods for interacting with {@link Graph}
@@ -44,6 +48,9 @@
*/
public final class Graphs {
+ private static final Logger LOGGER =
+ Logger.getLogger(Graphs.class.getName());
+
private Graphs() { }
public static <E extends DirectedEdge> DirectedGraph<E> asDirectedGraph(Graph<E> g) {
@@ -126,7 +133,36 @@ private Graphs() { }
int shufflesPerEdge) {
if (shufflesPerEdge < 1)
throw new IllegalArgumentException("must shuffle at least once");
- return shuffleInternal(g, g.edges(), shufflesPerEdge);
+ return shuffleInternal(g, g.edges(), shufflesPerEdge, new Random());
+ }
+
+ /**
+ * Shuffles the edges of {@code g} while still preserving the <a
+ * href="http://en.wikipedia.org/wiki/Degree_sequence#Degree_sequence">degree
+ * sequence</a> of the graph. Each edge in the graph will attempted to be
+ * conflated with another edge in the graph the specified number of times.
+ * If the edge cannot be swapped (possible due to the new version of the
+ * edge already existing), the attempt fails.
+ *
+ * @param g the graph whose elemets will be shuffled
+ * @param shufflesPerEdge the number of swaps to attempt per edge.
+ * @param rnd the source of randomness used to shuffle the graph's edges
+ *
+ * @return the total number of times an edge's endpoint was swapped with
+ * another edge's endpoint. At its maximum value, this will be
+ * {@code shufflesPerEdge * g.size()} assuming that each swap was
+ * successful. For dense graphs, this return value will be much
+ * less.
+ *
+ * @throws IllegalArgumentException if {@code shufflesPerEdge} is
+ * non-positive
+ */
+ public static <E extends Edge> int shufflePreserve(Graph<E> g,
+ int shufflesPerEdge,
+ Random rnd) {
+ if (shufflesPerEdge < 1)
+ throw new IllegalArgumentException("must shuffle at least once");
+ return shuffleInternal(g, g.edges(), shufflesPerEdge, rnd);
}
/**
@@ -135,7 +171,8 @@ private Graphs() { }
* exist.
*/
private static <E extends Edge> int shuffleInternal(
- Graph<E> g, Set<E> edges, int shufflesPerEdge) {
+ Graph<E> g, Set<E> edges, int shufflesPerEdge,
+ Random rand) {
int totalShuffles = 0;
int origSize = g.size();
int numEdges = edges.size();
@@ -148,10 +185,9 @@ private Graphs() { }
// have to reflectively create an array for its type.
E tmp = edges.iterator().next();
@SuppressWarnings("unchecked")
- E[] edgeArray = (E[])Array.newInstance(tmp.getClass(), 0);
+ E[] edgeArray = (E[])Array.newInstance(tmp.getClass(), 1);
edgeArray = edges.toArray(edgeArray);
- Random rand = new Random();
-
+
for (int i = 0; i < numEdges; ++i) {
for (int swap = 0; swap < shufflesPerEdge; ++swap) {
@@ -232,6 +268,39 @@ else if (swapped1.from() == swapped1.to()
public static <T,E extends TypedEdge<T>> int
shufflePreserveType(Multigraph<T,E> g, int shufflesPerEdge) {
+ return shufflePreserveType(g, shufflesPerEdge, new Random());
+ }
+
+ /**
+ * Shuffles the edges of {@code g} while still preserving the <a
+ * href="http://en.wikipedia.org/wiki/Degree_sequence#Degree_sequence">degree
+ * sequence</a> of the graph and that edges are only swapped with those of
+ * the same type. Each edge in the graph will attempted to be conflated
+ * with another edge in the graph the specified number of times. If the
+ * edge cannot be swapped (possible due to the new version of the edge
+ * already existing), the attempt fails.
+ *
+ * <p> Note that the {@link Multigraph#subview(Set,Set)} method makes it
+ * possilble to shuffle the edges for only a subset of the types in the
+ * multigraph.
+ *
+ * @param g the graph whose elemets will be shuffled
+ * @param shufflesPerEdge the number of swaps to attempt per edge.
+ * @param rnd the source of randomness used to shuffle the graph's edges
+ *
+ * @return the total number of times an edge's endpoint was swapped with
+ * another edge's endpoint. At its maximum value, this will be
+ * {@code shufflesPerEdge * g.size()} assuming that each swap was
+ * successful. For dense graphs, this return value will be much
+ * less.
+ *
+ * @throws IllegalArgumentException if {@code shufflesPerEdge} is
+ * non-positive
+ */
+ public static <T,E extends TypedEdge<T>> int
+ shufflePreserveType(Multigraph<T,E> g, int shufflesPerEdge,
+ Random rnd) {
+
if (shufflesPerEdge < 1)
throw new IllegalArgumentException("must shuffle at least once");
@@ -246,8 +315,12 @@ else if (swapped1.from() == swapped1.to()
// ConcurrentModificationException
Set<T> types = new HashSet<T>(g.edgeTypes());
for (T type : types) {
+ Set<E> edges = g.edges(type);
// Shuffle the edges of the current type only
- totalShuffles += shuffleInternal(g, g.edges(type), shufflesPerEdge);
+ int shuffles = shuffleInternal(g, edges, shufflesPerEdge, rnd);
+ totalShuffles += shuffles;
+ verbose(LOGGER, "Made %d shuffles for %d edges of type %s",
+ shuffles, edges.size(), type);
}
assert order == g.order() : "Changed the number of vertices";
@@ -255,6 +328,7 @@ else if (swapped1.from() == swapped1.to()
return totalShuffles;
}
+
/**
* To-do
*/
View
154 src/edu/ucla/sspace/graph/LinkClustering.java
@@ -325,20 +325,21 @@ public LinkClustering() { }
* O(n<sup>2</sup>) run-time complexity and O(n) space, which is a
* significant savings over running single-linkage with a max-heap.
*/
- private <E extends Edge> MultiMap<Integer,Integer> singleLink(Graph<E> g) {
+ private <E extends Edge> MultiMap<Integer,Integer>
+ singleLink(final Graph<E> g) {
final int numEdges = g.size();
// Index the edges so that we can quickly look up which cluster an edge
// is in
- Indexer<Edge> edgeIndexer = new HashIndexer<Edge>();
+ final Indexer<Edge> edgeIndexer = new HashIndexer<Edge>();
// Keep a simple int->int mapping from each edge's index to (1) the
// cluster its in, (2) the most similar edge to that edge, (3) the
// similarity of the most similar edge.
int[] edgeToCluster = new int[numEdges];
- int[] edgeToMostSim = new int[numEdges];
- double[] edgeToSimOfMostSim = new double[numEdges];
+ final int[] edgeToMostSim = new int[numEdges];
+ final double[] edgeToSimOfMostSim = new double[numEdges];
// Keep track of the vertices in each cluster
IntIntMultiMap clusterToVertices = new IntIntHashMultiMap();
@@ -355,37 +356,55 @@ public LinkClustering() { }
// For each edge, find the most similar cluster updating the relative
// indices of the rowToMostSimilar arrays with the results.
+ Object taskKey = WORK_QUEUE.registerTaskGroup(g.order());
IntIterator iter1 = g.vertices().iterator();
while (iter1.hasNext()) {
final int v1 = iter1.nextInt();
-// System.out.printf("Computing similarities for " +
-// "vertex %d%n", v1);
-
- veryVerbose(LOGGER, "Computing similarities for " +
- "vertex %d", v1);
- IntSet neighbors = g.getNeighbors(v1);
- IntIterator it1 = neighbors.iterator();
- while (it1.hasNext()) {
- int v2 = it1.nextInt();
- IntIterator it2 = neighbors.iterator();
- while (it2.hasNext()) {
- int v3 = it2.nextInt();
- if (v2 == v3)
- break;
- double sim = getConnectionSimilarity(g, v1, v2, v3);
- int e1index = edgeIndexer.index(new SimpleEdge(v1, v2));
- int e2index = edgeIndexer.index(new SimpleEdge(v1, v3));
- if (sim > edgeToSimOfMostSim[e1index]) {
- edgeToSimOfMostSim[e1index] = sim;
- edgeToMostSim[e1index] = e2index;
- }
- if (sim > edgeToSimOfMostSim[e2index]) {
- edgeToSimOfMostSim[e2index] = sim;
- edgeToMostSim[e2index] = e1index;
+ WORK_QUEUE.add(taskKey, new Runnable() {
+ public void run() {
+ veryVerbose(LOGGER, "Computing similarities for " +
+ "vertex %d", v1);
+ IntSet neighbors = g.getNeighbors(v1);
+ IntIterator it1 = neighbors.iterator();
+ while (it1.hasNext()) {
+ int v2 = it1.nextInt();
+ IntIterator it2 = neighbors.iterator();
+ while (it2.hasNext()) {
+ int v3 = it2.nextInt();
+ if (v2 == v3)
+ break;
+ double sim = getConnectionSimilarity(
+ g, v1, v2, v3);
+
+ int e1index = edgeIndexer
+ .index(new SimpleEdge(v1, v2));
+ int e2index = edgeIndexer
+ .index(new SimpleEdge(v1, v3));
+
+ // Lock on the canonical instance of e1 before
+ // updating its similarity values
+ synchronized(edgeIndexer.lookup(e1index)) {
+ if (sim > edgeToSimOfMostSim[e1index]) {
+ edgeToSimOfMostSim[e1index] = sim;
+ edgeToMostSim[e1index] = e2index;
+ }
+ }
+
+ // Lock on the canonical instance of e2 before
+ // updating its similarity values
+ synchronized(edgeIndexer.lookup(e2index)) {
+ if (sim > edgeToSimOfMostSim[e2index]) {
+ edgeToSimOfMostSim[e2index] = sim;
+ edgeToMostSim[e2index] = e1index;
+ }
+ }
+ }
+ }
}
- }
- }
+ });
}
+ WORK_QUEUE.await(taskKey);
+
// Keep track of the size of each cluster so that we can merge the
// smaller into the larger. Each cluster has an initial size of 1
@@ -598,7 +617,7 @@ else if (cId == cluster2index) {
* @param numClusters the number of clusters to produce
*/
private <E extends Edge> MultiMap<Integer,Integer> singleLink(
- Graph<E> g, int numClusters) {
+ final Graph<E> g, int numClusters) {
final int numEdges = g.size();
if (numClusters < 1 || numClusters > numEdges)
@@ -607,14 +626,14 @@ else if (cId == cluster2index) {
// Index the edges so that we can quickly look up which cluster an edge
// is in
- Indexer<Edge> edgeIndexer = new HashIndexer<Edge>();
+ final Indexer<Edge> edgeIndexer = new HashIndexer<Edge>();
// Keep a simple int->int mapping from each edge's index to (1) the
// cluster its in, (2) the most similar edge to that edge, (3) the
// similarity of the most similar edge.
int[] edgeToCluster = new int[numEdges];
- int[] edgeToMostSim = new int[numEdges];
- double[] edgeToSimOfMostSim = new double[numEdges];
+ final int[] edgeToMostSim = new int[numEdges];
+ final double[] edgeToSimOfMostSim = new double[numEdges];
// Keep track of the vertices in each cluster
IntIntMultiMap clusterToVertices = new IntIntHashMultiMap();
@@ -630,35 +649,54 @@ else if (cId == cluster2index) {
// For each edge, find the most similar cluster updating the relative
// indices of the rowToMostSimilar arrays with the results.
+ Object taskKey = WORK_QUEUE.registerTaskGroup(g.order());
IntIterator iter1 = g.vertices().iterator();
while (iter1.hasNext()) {
final int v1 = iter1.nextInt();
- veryVerbose(LOGGER, "Computing similarities for " +
- "vertex %d", v1);
- IntSet neighbors = g.getNeighbors(v1);
- IntIterator it1 = neighbors.iterator();
- while (it1.hasNext()) {
- int v2 = it1.nextInt();
- IntIterator it2 = neighbors.iterator();
- while (it2.hasNext()) {
- int v3 = it2.nextInt();
- if (v2 == v3)
- break;
- double sim = getConnectionSimilarity(g, v1, v2, v3);
- int e1index = edgeIndexer.index(new SimpleEdge(v1, v2));
- if (sim > edgeToSimOfMostSim[e1index]) {
- edgeToSimOfMostSim[e1index] = sim;
- edgeToMostSim[e1index] = e2index;
- }
-
- int e2index = edgeIndexer.index(new SimpleEdge(v1, v3));
- if (sim > edgeToSimOfMostSim[e2index]) {
- edgeToSimOfMostSim[e2index] = sim;
- edgeToMostSim[e2index] = e1index;
+ WORK_QUEUE.add(taskKey, new Runnable() {
+ public void run() {
+ veryVerbose(LOGGER, "Computing similarities for " +
+ "vertex %d", v1);
+ IntSet neighbors = g.getNeighbors(v1);
+ IntIterator it1 = neighbors.iterator();
+ while (it1.hasNext()) {
+ int v2 = it1.nextInt();
+ IntIterator it2 = neighbors.iterator();
+ while (it2.hasNext()) {
+ int v3 = it2.nextInt();
+ if (v2 == v3)
+ break;
+ double sim = getConnectionSimilarity(
+ g, v1, v2, v3);
+
+ int e1index = edgeIndexer
+ .index(new SimpleEdge(v1, v2));
+ int e2index = edgeIndexer
+ .index(new SimpleEdge(v1, v3));
+
+ // Lock on the canonical instance of e1 before
+ // updating its similarity values
+ synchronized(edgeIndexer.lookup(e1index)) {
+ if (sim > edgeToSimOfMostSim[e1index]) {
+ edgeToSimOfMostSim[e1index] = sim;
+ edgeToMostSim[e1index] = e2index;
+ }
+ }
+
+ // Lock on the canonical instance of e2 before
+ // updating its similarity values
+ synchronized(edgeIndexer.lookup(e2index)) {
+ if (sim > edgeToSimOfMostSim[e2index]) {
+ edgeToSimOfMostSim[e2index] = sim;
+ edgeToMostSim[e2index] = e1index;
+ }
+ }
+ }
+ }
}
- }
- }
+ });
}
+ WORK_QUEUE.await(taskKey);
// Keep track of the size of each cluster so that we can merge the
// smaller into the larger. Each cluster has an initial size of 1
View
12 src/edu/ucla/sspace/graph/SimpleGraphIterator.java
@@ -33,7 +33,7 @@
import java.util.Queue;
import java.util.Set;
-import edu.ucla.sspace.util.Pair;
+import edu.ucla.sspace.util.primitive.IntPair;
/**
@@ -76,7 +76,7 @@
* @throws NullPointerException if {@code g} is {@code null}
*/
public SimpleGraphIterator(Multigraph<T,E> g, int subgraphSize) {
- subgraphIterator = //null;
+ subgraphIterator =
new SubgraphIterator<E,Multigraph<T,E>>(g, subgraphSize);
next = new ArrayDeque<Multigraph<T,E>>();
advance();
@@ -94,7 +94,7 @@ private void advance() {
// Find all pairs of edges, checking whether the graph is a simple
// graph already
boolean isSimpleAlready = true;
- List<Pair<Integer>> connected = new ArrayList<Pair<Integer>>();
+ List<IntPair> connected = new ArrayList<IntPair>();
for (int i : g.vertices()) {
for (int j : g.vertices()) {
if (i == j)
@@ -108,7 +108,7 @@ private void advance() {
}
int size = edges.size();
if (size > 0)
- connected.add(new Pair<Integer>(i, j));
+ connected.add(new IntPair(i, j));
if (size > 1)
isSimpleAlready = false;
}
@@ -144,11 +144,11 @@ private void advance() {
* be added. This graph will be copied prior to modifying it.
*/
private Collection<Multigraph<T,E>> enumerateSimpleGraphs(
- Multigraph<T,E> input, List<Pair<Integer>> connected,
+ Multigraph<T,E> input, List<IntPair> connected,
int curPair, Multigraph<T,E> toCopy) {
List<Multigraph<T,E>> simpleGraphs = new LinkedList<Multigraph<T,E>>();
- Pair<Integer> p = connected.get(curPair);
+ IntPair p = connected.get(curPair);
// Get the set of edges between the current vertex pair
Set<E> edges = input.getEdges(p.x, p.y);
// Pick one of the edges and generate a graph from the remaining pairs
View
16 src/edu/ucla/sspace/graph/SparseDirectedTypedEdgeSet.java
@@ -439,20 +439,12 @@ public boolean isEmpty() {
throw new Error();
}
- public Set<Integer> predecessors() {
- return TDecorators.wrap(inEdges.keySet());
+ public IntSet predecessors() {
+ return TroveIntSet.wrap(inEdges.keySet());
}
- TIntSet predecessorsPrimitive() {
- return inEdges.keySet();
- }
-
- public Set<Integer> successors() {
- return TDecorators.wrap(outEdges.keySet());
- }
-
- TIntSet successorsPrimitive() {
- return outEdges.keySet();
+ public IntSet successors() {
+ return TroveIntSet.wrap(outEdges.keySet());
}
/**
View
737 src/edu/ucla/sspace/graph/SparseTypedEdgeSet.java
@@ -21,22 +21,43 @@
package edu.ucla.sspace.graph;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
import java.util.AbstractSet;
+import java.util.ArrayList;
+import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
-import edu.ucla.sspace.util.CombinedSet;
+import edu.ucla.sspace.util.HashMultiMap;
+import edu.ucla.sspace.util.MultiMap;
+import edu.ucla.sspace.util.primitive.CompactIntSet;
import edu.ucla.sspace.util.primitive.IntIterator;
import edu.ucla.sspace.util.primitive.IntSet;
import edu.ucla.sspace.util.primitive.TroveIntSet;
+import gnu.trove.TDecorators;
+import gnu.trove.map.TIntIntMap;
+import gnu.trove.map.TObjectIntMap;
+import gnu.trove.set.TIntSet;
+import gnu.trove.set.hash.TIntHashSet;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.iterator.TIntObjectIterator;
+import gnu.trove.procedure.TIntObjectProcedure;
+
/**
* An {@link EdgeSet} implementation that stores {@link TypedEdge} instances for
@@ -44,18 +65,88 @@
* interface for interacting with edges on the basis of their type.
*/
public class SparseTypedEdgeSet<T> extends AbstractSet<TypedEdge<T>>
- implements EdgeSet<TypedEdge<T>> {
+ implements EdgeSet<TypedEdge<T>>, java.io.Serializable {
+ private static final long serialVersionUID = 1L;
+
+ /////
+ //
+ // IMPLEMENTATION NOTE: This class stores a set of types associated each
+ // each in coming and outgoing edge's vertex. Rather than storing the set
+ // of types as a Set<T>, the set is represented in a compact form using a
+ // BitSet, where each bit corresponds to a type index. Given the potential
+ // for a huge number of edge sets in any give graph, having each set
+ // maintain its own type-to-bit-index mapping wastes a significant amount of
+ // space -- especially if the sets are all using the same types. Therefore,
+ // we use a class-level cache of mapping the types to indices with two
+ // global static variables. This results in a significant space savings.
+ // However, because these are static variables, their mapping state needs to
+ // be preserved upon serialization, which leads to a (rather complex) custom
+ // serialization code.
+ //
+ ////
+
+ /**
+ * A mapping from indices to their corresponding types
+ */
+ private static final List<Object> TYPES = new ArrayList<Object>();
+
+ /**
+ * The mapping from types to their indices
+ */
+ private static final Map<Object,Integer> TYPE_INDICES =
+ new HashMap<Object,Integer>();
+
+ /**
+ * Returns the index for the given type, creating a new index if necessary
+ */
+ private static int index(Object o) {
+ Integer i = TYPE_INDICES.get(o);
+ if (i == null) {
+ synchronized (TYPE_INDICES) {
+ // check that another thread did not already update the index
+ i = TYPE_INDICES.get(o);
+ if (i != null)
+ return i;
+ else {
+ int j = TYPE_INDICES.size();
+ TYPE_INDICES.put(o, j);
+ TYPES.add(o);
+ return j;
+ }
+ }
+ }
+ return i;
+ }
+
/**
* The vertex to which all edges in the set are connected
*/
private final int rootVertex;
-
- private final Map<T,IntSet> typeToEdges;
+
+ /**
+ * A mapping from a type to the set of outgoing edges
+ */
+ private final TIntObjectHashMap<BitSet> edges;
+ /**
+ * The number of edges in this set.
+ */
+ private int size;
+
+ /**
+ * The types that are contained in this set;
+ */
+ private BitSet setTypes;
+
+ /**
+ * Creates a new {@code SparseTypedEdgeSet} for the specfied vertex.
+ */
public SparseTypedEdgeSet(int rootVertex) {
this.rootVertex = rootVertex;
- typeToEdges = new HashMap<T,IntSet>();
+ edges = new TIntObjectHashMap<BitSet>();
+ setTypes = new BitSet();
+ size = 0;
}
/**
@@ -63,36 +154,69 @@ public SparseTypedEdgeSet(int rootVertex) {
* if the non-root vertex has a greater index that this vertex.
*/
public boolean add(TypedEdge<T> e) {
- if (e.from() == rootVertex) {
- IntSet edges = getEdgesForType(e.edgeType());
- return edges.add(e.to());
+ if (e.from() == rootVertex)
+ return add(edges, e.to(), e.edgeType());
+ else if (e.to() == rootVertex)
+ return add(edges, e.from(), e.edgeType());
+ return false;
+ }
+
+ /**
+ * Adds an edge to the spectied set that connectes t{@code i} according to
+ * the given type, or returns {@code false} if the edge already existed.
+ */
+ private boolean add(TIntObjectHashMap<BitSet> edges, int i, T type) {
+ BitSet types = edges.get(i);
+ // If there weren't any edges to this vertex, then special case the
+ // creation and return true.
+ if (types == null) {
+ types = new BitSet();
+ edges.put(i, types);
+ types.set(index(type));
+ size++;
+ return true;
}
- else if (e.to() == rootVertex) {
- IntSet edges = getEdgesForType(e.edgeType());
- return edges.add(e.from());
+ // Otherwise, lookup the type's index and see if it already exists in
+ // the bitset, indicating the edge does too
+ int index = index(type);
+ setTypes.set(index);
+ if (!types.get(index)) {
+ types.set(index);
+ size++;
+ return true;
}
+ // If the type was already there, then return false because the edge
+ // already exists
return false;
}
/**
* {@inheritDoc}
*/
+ public void clear() {
+ edges.clear();
+ }
+
+ /**
+ * {@inheritDoc} The set of vertices returned by this set is immutable.
+ */
public IntSet connected() {
- IntSet connected = new TroveIntSet();
- for (IntSet s : typeToEdges.values())
- connected.addAll(s);
- return connected;
+ return TroveIntSet.wrap(edges.keySet());
}
/**
* {@inheritDoc}
*/
public boolean connects(int vertex) {
- for (IntSet edges : typeToEdges.values()) {
- if (edges.contains(vertex))
- return true;
- }
- return false;
+ return edges.containsKey(vertex);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public boolean connects(int vertex, T type) {
+ BitSet types = edges.get(vertex);
+ return types != null && types.get(index(type));
}
/**
@@ -101,79 +225,117 @@ public boolean connects(int vertex) {
public boolean contains(Object o) {
if (!(o instanceof TypedEdge))
return false;
-
@SuppressWarnings("unchecked")
TypedEdge<T> e = (TypedEdge<T>)o;
-
- if (e.from() == rootVertex) {
- IntSet edges = typeToEdges.get(e.edgeType());
- return edges != null && edges.contains(e.to());
- }
- else if (e.to() == rootVertex) {
- IntSet edges = typeToEdges.get(e.edgeType());
- return edges != null && edges.contains(e.from());
- }
+
+ if (e.from() == rootVertex)
+ return contains(edges, e.to(), e.edgeType());
+ else if (e.to() == rootVertex)
+ return contains(edges, e.from(), e.edgeType());
return false;
}
+ private boolean contains(TIntObjectHashMap<BitSet> edges, int i, T type) {
+ BitSet types = edges.get(i);
+ if (types == null)
+ return false;
+ int index = index(type);
+ return types.get(index);
+ }
+
/**
* {@inheritDoc}
*/
- public SparseTypedEdgeSet copy(IntSet vertices) {
- throw new Error();
-// SparseTypedEdgeSet copy = new SparseTypedEdgeSet(rootVertex);
-// if (edges.size() < vertices.size()) {
-// TIntIterator iter = edges.iterator();
-// while (iter.hasNext()) {
-// int v = iter.next();
-// if (vertices.contains(v))
-// copy.edges.add(v);
-// }
-// }
-// else {
-// IntIterator iter = vertices.iterator();
-// while (iter.hasNext()) {
-// int v = iter.nextInt();
-// if (edges.contains(v))
-// copy.edges.add(v);
-// }
-// }
-// return copy;
+ public SparseTypedEdgeSet<T> copy(IntSet vertices) {
+ SparseTypedEdgeSet<T> copy = new SparseTypedEdgeSet<T>(rootVertex);
+
+ if (vertices.size() < edges.size()) {
+ IntIterator iter = vertices.iterator();
+ while (iter.hasNext()) {
+ int v = iter.nextInt();
+ if (edges.containsKey(v)) {
+ BitSet b = edges.get(v);
+ BitSet b2 = new BitSet();
+ b2.or(b);
+ copy.edges.put(v, b2);
+ }
+ }
+ }
+ else {
+ TIntObjectIterator<BitSet> iter = edges.iterator();
+ while (iter.hasNext()) {
+ iter.advance();
+ int v = iter.key();
+ if (vertices.contains(v)) {
+ BitSet b = iter.value();
+ BitSet b2 = new BitSet();
+ b2.or(b);
+ copy.edges.put(v, b2);
+ }
+ }
+ }
+ return copy;
+ }
+
+ /**
+ * Removes all edges to {@code v}.
+ */
+ public boolean disconnect(int v) {
+ BitSet b = edges.remove(v);
+ if (b != null) {
+ size -= b.cardinality();
+ return true;
+ }
+ return false;
}
/**
* {@inheritDoc}
*/
- public boolean disconnect(int vertex) {
- throw new Error();
+ public Set<TypedEdge<T>> getEdges(final T type) {
+ if (!TYPE_INDICES.containsKey(type))
+ return Collections.<TypedEdge<T>>emptySet();
+ final int typeIndex = index(type);
+ final Set<TypedEdge<T>> s = new HashSet<TypedEdge<T>>();
+ edges.forEachEntry(new TIntObjectProcedure<BitSet>() {
+ public boolean execute(int v, BitSet types) {
+ if (types.get(typeIndex))
+ s.add(new SimpleTypedEdge<T>(
+ type, v, rootVertex));
+ return true;
+ }
+ });
+ return s;
}
/**
* {@inheritDoc}
*/
public Set<TypedEdge<T>> getEdges(int vertex) {
- Set<TypedEdge<T>> output = new HashSet<TypedEdge<T>>();
- for (Map.Entry<T,IntSet> e : typeToEdges.entrySet()) {
- IntSet edges = e.getValue();
- if (edges.contains(vertex)) {
- output.add(new SimpleTypedEdge<T>(
- e.getKey(), vertex, rootVertex));
- }
+ BitSet b = edges.get(vertex);
+ if (b == null)
+ return Collections.<TypedEdge<T>>emptySet();
+ Set<TypedEdge<T>> s = new HashSet<TypedEdge<T>>();
+ for (int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i+1)) {
+ @SuppressWarnings("unchecked")
+ T type = (T)(TYPES.get(i));
+ s.add(new SimpleTypedEdge<T>(type, vertex, rootVertex));
}
- return output;
+ return s;
}
/**
- * Returns the set of edges that have the specified type.
+ * {@inheritDoc}
*/
- private IntSet getEdgesForType(T type) {
- IntSet edges = typeToEdges.get(type);
- if (edges == null) {
- edges = new TroveIntSet();
- typeToEdges.put(type, edges);
- }
- return edges;
- }
+ public Set<TypedEdge<T>> getEdges(int vertex, Set<T> types) {
+ // NOTE: this is purely unoptimized code, so fix if it ever gets in a
+ // hotspot
+ Set<TypedEdge<T>> set = new HashSet<TypedEdge<T>>();
+ for (TypedEdge<T> e : new EdgesForVertex(vertex))
+ if (types.contains(e.edgeType()))
+ set.add(e);
+ return set;
+ }
/**
* {@inheritDoc}
@@ -184,9 +346,16 @@ public int getRoot() {
/**
* {@inheritDoc}
+ */
+ public boolean isEmpty() {
+ return edges.isEmpty();
+ }
+
+ /**
+ * {@inheritDoc}
*/
public Iterator<TypedEdge<T>> iterator() {
- return new TypedEdgeIterator();
+ return new EdgeIterator();
}
/**
@@ -198,14 +367,29 @@ public boolean remove(Object o) {
@SuppressWarnings("unchecked")
TypedEdge<T> e = (TypedEdge<T>)o;
-
- if (e.from() == rootVertex) {
- IntSet edges = typeToEdges.get(e.edgeType());
- return edges != null && edges.remove(e.to());
- }
- else if (e.to() == rootVertex) {
- IntSet edges = typeToEdges.get(e.edgeType());
- return edges != null && edges.remove(e.from());
+
+ if (e.from() == rootVertex)
+ return remove(edges, e.to(), e.edgeType());
+ else if (e.to() == rootVertex)
+ return remove(edges, e.from(), e.edgeType());
+ return false;
+ }
+
+ private boolean remove(TIntObjectHashMap<BitSet> edges, int i, T type) {
+ BitSet types = edges.get(i);
+ if (types == null)
+ return false;
+ int index = index(type);
+ // If there was an edge of that type, remove it and update the
+ // "connected" set as necessary
+ if (types.get(index)) {
+ types.set(index, false);
+ // If this was the last edge to that vertex, remove this BitMap
+ if (types.cardinality() == 0) {
+ edges.remove(i);
+ size--;
+ }
+ return true;
}
return false;
}
@@ -214,21 +398,223 @@ else if (e.to() == rootVertex) {
* {@inheritDoc}
*/
public int size() {
- int sz = 0;
- for (IntSet edges : typeToEdges.values())
- sz += edges.size();
- return sz;
+ return size;
}
- public String toString() {
- StringBuilder sb = new StringBuilder(typeToEdges.size() * 16);
- sb.append("{ from: " ).append(rootVertex).append(' ');
- for (Map.Entry<T,IntSet> e : typeToEdges.entrySet()) {
- sb.append("{type: ").append(e.getKey()).
- append(" to: ").append(e.getValue()).append('}');
+ /**
+ * Returns the set of types contained within this set
+ */
+ public Set<T> types() {
+ return new Types();
+ }
+
+ /**
+ *
+ */
+ public Iterator<TypedEdge<T>> uniqueIterator() {
+ return new UniqueEdgeIterator();
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ out.defaultWriteObject();
+ // The TYPE_INDICES mapping is not longer valid upon deserialization so
+ // we need to write it as a part of this object's state. Serialization
+ // uses some caching, so if multiple instances of this class are being
+ // written, the cache is only saved once, which saves significant space.
+ out.writeObject(TYPE_INDICES);
+ }
+
+ private void readObject(ObjectInputStream in)
+ throws IOException, ClassNotFoundException {
+ // Restore the existing state of the Set
+ in.defaultReadObject();
+
+ // Then read in the type indices, which may or may not need to be
+ // restored depending on the current state of the cache
+ @SuppressWarnings("unchecked")
+ Map<Object,Integer> typeIndices =
+ (Map<Object,Integer>)Map.class.cast(in.readObject());
+ boolean needToRemapIndices = true;
+ if (!TYPE_INDICES.equals(typeIndices)) {
+ if (TYPE_INDICES.isEmpty()) {
+ synchronized (TYPE_INDICES) {
+ // Check whether some thread might have modified the map in
+ // the mean-time. If not, then use our type mapping as the
+ // default
+ if (TYPE_INDICES.isEmpty()) {
+ TYPE_INDICES.putAll(typeIndices);
+ // Fill in the VALUES array with nulls first so that we
+ // can iterate through the typeIndices map once without
+ // having to worry about the indexing
+ for (int i = 0; i < TYPE_INDICES.size(); ++i)
+ TYPES.add(null);
+ for (Map.Entry<Object,Integer> e :
+ TYPE_INDICES.entrySet()) {
+ TYPES.set(e.getValue(), e.getKey());
+ }
+ needToRemapIndices = false;
+ }
+
+ }
+ }
+ }
+ // Check if the indices we have are a subset or superset of the current
+ // type indices
+ else {
+ boolean foundMismatch = false;
+ for (Map.Entry<Object,Integer> e : typeIndices.entrySet()) {
+ Object o = e.getKey();
+ int oldIndex = e.getValue();
+ Integer curIndex = TYPE_INDICES.get(o);
+ // If the current index is null, then map it to what this has,
+ // which is possibly beyond the range of the current set of
+ // types. Note that our type mapping isn't invalidated yet by
+ // this action, so we don't need to remap.
+ if (curIndex == null) {
+ // Grow the TYPES list until there is room for this
+ // additional index
+ while (TYPES.size() <= oldIndex)
+ TYPES.add(null);
+ TYPES.set(oldIndex, o);
+ TYPE_INDICES.put(o, oldIndex);
+ }
+ else if (curIndex != oldIndex) {
+ foundMismatch = true;
+ }
+ }
+ // If we were successfully able to add the indices we have without
+ // disturbing the existing mapping, or our indices were just a
+ // subset of the existing ones, then we don't need to remap the
+ // total set of indices.
+ if (!foundMismatch)
+ needToRemapIndices = false;
+ }
+
+ // If the state of this set's type is inconsistent with the current type
+ // mapping, then update the mapping with any missing types and then
+ // reset all of its BitSet contents with the correct indices
+ if (needToRemapIndices) {
+ TIntIntMap typeRemapping = new TIntIntHashMap();
+ for (Map.Entry<Object,Integer> e : typeIndices.entrySet()) {
+ Object o = e.getKey();
+ int oldIndex = e.getValue();
+ // NOTE: the else {} case above may have added several of our
+ // types that weren't inconsistent, so this may be an identity
+ // mapping for some types, which is nice.
+ typeRemapping.put(oldIndex, index(o));
+ }
+ // Remap all the in-edges vertices' types...
+ for (TIntObjectIterator<BitSet> it = edges.iterator(); it.hasNext(); ) {
+ it.advance();
+ int v = it.key();
+ BitSet oldIndices = it.value();
+ BitSet newIndices = new BitSet();
+ for (int i = oldIndices.nextSetBit(0); i >= 0;
+ i = oldIndices.nextSetBit(i+1)) {
+ newIndices.set(typeRemapping.get(i));
+ }
+ it.setValue(newIndices);
+ }
+ }
+ }
+
+
+ private class Types extends AbstractSet<T> {
+
+ public boolean contains(Object o) {
+ if (TYPE_INDICES.containsKey(o)) {
+ Integer i = TYPE_INDICES.get(o);
+ return setTypes.get(i);
+ }
+ return false;
+ }
+
+ public Iterator <T> iterator() {
+ return new TypeIter();
+ }
+
+ public int size() {
+ return setTypes.cardinality();
+ }
+
+ private class TypeIter implements Iterator<T> {
+
+ IntIterator typeIndices;
+
+ public TypeIter() {
+ typeIndices = CompactIntSet.wrap(setTypes).iterator();
+ }
+
+ public boolean hasNext() {
+ return typeIndices.hasNext();
+ }
+
+ public T next() {
+ if (!typeIndices.hasNext())
+ throw new NoSuchElementException();
+ int i = typeIndices.nextInt();
+ @SuppressWarnings("unchecked")
+ T type = (T)(TYPES.get(i));
+ return type;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+ }
+
+ /**
+ * A wrapper around the set of edges that connect another vertex to the root
+ * vertex
+ */
+ private class EdgesForVertex extends AbstractSet<TypedEdge<T>> {
+
+ /**
+ * The vertex in the edges that is not this root vertex
+ */
+ private final int otherVertex;
+
+ public EdgesForVertex(int otherVertex) {
+ this.otherVertex = otherVertex;
+ }
+
+ @Override public boolean add(TypedEdge<T> e) {
+ return ((e.to() == rootVertex && e.from() == otherVertex)
+ || (e.from() == rootVertex && e.to() == otherVertex))
+ && SparseTypedEdgeSet.this.add(e);
+ }
+
+ @Override public boolean contains(Object o) {
+ if (!(o instanceof TypedEdge))
+ return false;
+ TypedEdge<?> e = (TypedEdge)o;
+ return ((e.to() == rootVertex && e.from() == otherVertex)
+ || (e.from() == rootVertex && e.to() == otherVertex))
+ && SparseTypedEdgeSet.this.contains(e);
+ }
+
+ @Override public boolean isEmpty() {
+ return !SparseTypedEdgeSet.this.connects(otherVertex);
+ }
+
+ @Override public Iterator<TypedEdge<T>> iterator() {
+ return new EdgesForVertexIterator(otherVertex);
+ }
+
+ @Override public boolean remove(Object o) {
+ if (!(o instanceof TypedEdge))
+ return false;
+ TypedEdge<?> e = (TypedEdge)o;
+ return ((e.to() == rootVertex && e.from() == otherVertex)
+ || (e.from() == rootVertex && e.to() == otherVertex))
+ && SparseTypedEdgeSet.this.remove(e);
+ }
+
+ @Override public int size() {
+ BitSet b = edges.get(otherVertex);
+ return (b == null) ? 0 : b.cardinality();
}
- sb.append('}');
- return sb.toString();
}
/**
@@ -236,49 +622,176 @@ public String toString() {
* TypedEdge} instances as it traverses through the set of connected
* vertices.
*/
- private class TypedEdgeIterator implements Iterator<TypedEdge<T>> {
-
- private Iterator<Map.Entry<T,IntSet>> edgeIter;
-
- private Iterator<Integer> curIter;
+ private class EdgesForVertexIterator implements Iterator<TypedEdge<T>> {
- private T curType;
+ private int curTypeIndex;
- private Iterator<Integer> lastRemovedFrom;
+ private BitSet curTypes;
+ /**
+ * The next edge to return. This field is updated by {@link advance()}
+ */
private TypedEdge<T> next;
- public TypedEdgeIterator() {
- edgeIter = typeToEdges.entrySet().iterator();
+ int otherVertex;
+
+ public EdgesForVertexIterator(int otherVertex) {
+ this.otherVertex = otherVertex;
+ curTypeIndex = -1;
+ curTypes = edges.get(otherVertex);
advance();
}
private void advance() {
next = null;
- if ((curIter == null || !curIter.hasNext()) && edgeIter.hasNext()) {
- Map.Entry<T,IntSet> e = edgeIter.next();
- curIter = e.getValue().iterator();
- curType = e.getKey();
+ while (next == null && curTypes != null) {
+ if (curTypes == null) {
+ curTypes = edges.get(otherVertex);
+ curTypeIndex = -1;
+ }
+
+ if (curTypes == null)
+ break;
+ curTypeIndex = curTypes.nextSetBit(curTypeIndex + 1);
+ if (curTypeIndex >= 0) {
+ // We know that the TYPES map has the right object type
+ @SuppressWarnings("unchecked")
+ T type = (T)(TYPES.get(curTypeIndex));
+ next = new SimpleTypedEdge<T>(type, otherVertex, rootVertex);
+ }
+ // If there were no further types in this edge set, then loop
+ // again to load the next set of types for a new vertex, if it exists
+ else
+ curTypes = null;
}
}
public boolean hasNext() {
- return curIter.hasNext();
+ return next != null;
}
public TypedEdge<T> next() {
- if (!hasNext())
+ if (next == null)
throw new NoSuchElementException();
- TypedEdge<T> cur =
- new SimpleTypedEdge<T>(curType, rootVertex, curIter.next());
- // Update the iterator on which remove() will be called
- if (lastRemovedFrom != curIter)
- lastRemovedFrom = curIter;
- return cur;
+ TypedEdge<T> n = next;
+ advance();
+ return n;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+
+ /**
+ * An iterator over the edges in this set that constructs {@link
+ * TypedEdge} instances as it traverses through the set of connected
+ * vertices.
+ */
+ private class EdgeIterator implements Iterator<TypedEdge<T>> {
+
+ /**
+ * An iterator over the incoming edges for the current type
+ */
+ private TIntObjectIterator<BitSet> iter;
+
+ /**
+ * The next edge to return. This field is updated by {@link advance()}
+ */
+ private TypedEdge<T> next;
+
+ private int curVertex;
+
+ private IntIterator curVertexTypes;
+
+
+ public EdgeIterator() {
+ this.iter = edges.iterator();
+ advance();
+ }
+
+ private void advance() {
+ next = null;
+ while (next == null) {
+ // Check whether the current vertex has types left, and if not,
+ // load a new vertex's types
+ if (curVertexTypes == null || !curVertexTypes.hasNext()) {
+ // If there were no more types to load, stop searching
+ if (!iter.hasNext())
+ break;
+ iter.advance();
+ curVertex = iter.key();
+ curVertexTypes = CompactIntSet.wrap(iter.value()).iterator();
+ }
+
+ if (curVertexTypes.hasNext()) {
+ int typeIndex = curVertexTypes.nextInt();
+ @SuppressWarnings("unchecked")
+ T type = (T)(TYPES.get(typeIndex));
+ next = new SimpleTypedEdge<T>(type, curVertex, rootVertex);
+ }
+ }
+ }
+
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ public TypedEdge<T> next() {
+ if (next == null)
+ throw new NoSuchElementException();
+ TypedEdge<T> n = next;
+ advance();
+ return n;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /**
+ * An iterator over the edges in this set that constructs {@link
+ * TypedEdge} instances as it traverses through the set of connected
+ * vertices.
+ */
+ private class UniqueEdgeIterator implements Iterator<TypedEdge<T>> {
+
+ Iterator<TypedEdge<T>> it;
+
+ TypedEdge<T> next;
+
+ public UniqueEdgeIterator() {
+ it = iterator();
+ advance();
+ }
+
+ private void advance() {
+ next = null;
+ while (it.hasNext() && next == null) {
+ TypedEdge<T> e = it.next();
+ if ((e.from() == rootVertex && e.to() < rootVertex)
+ || (e.to() == rootVertex && e.from() < rootVertex))
+ next = e;
+ }
+ }
+
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ public TypedEdge<T> next() {
+ if (next == null)
+ throw new NoSuchElementException();
+ TypedEdge<T> n = next;
+// System.out.println("next: " + n);
+ advance();
+ return n;
}
public void remove() {
- lastRemovedFrom.remove();
+ throw new UnsupportedOperationException();
}
}