-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#691] Implement Connected Components via Gelly
* Implement Connected Components via Gelly. * fixes #691
- Loading branch information
Showing
11 changed files
with
400 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
...gradoop/flink/algorithms/gelly/connectedcomponents/AnnotateWeaklyConnectedComponents.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.connectedcomponents; | ||
|
||
import org.apache.flink.api.java.DataSet; | ||
import org.apache.flink.graph.Graph; | ||
import org.apache.flink.types.NullValue; | ||
import org.gradoop.common.model.impl.id.GradoopId; | ||
import org.gradoop.common.model.impl.pojo.Vertex; | ||
import org.gradoop.flink.algorithms.gelly.GellyAlgorithm; | ||
import org.gradoop.flink.algorithms.gelly.connectedcomponents.functions.ConnectedComponentToAttribute; | ||
import org.gradoop.flink.algorithms.gelly.functions.EdgeToGellyEdgeWithNullValue; | ||
import org.gradoop.flink.algorithms.gelly.functions.VertexToGellyVertexWithGradoopId; | ||
import org.gradoop.flink.model.api.epgm.LogicalGraph; | ||
import org.gradoop.flink.model.impl.functions.epgm.Id; | ||
|
||
/** | ||
* A gradoop operator wrapping {@link org.apache.flink.graph.library.ConnectedComponents}. | ||
* The result will be the same {@link LogicalGraph} with a component id assigned to each vertex | ||
* as a Property. | ||
*/ | ||
public class AnnotateWeaklyConnectedComponents extends GellyAlgorithm<GradoopId, NullValue> { | ||
|
||
/** | ||
* Property key to store the component id in. | ||
*/ | ||
private final String propertyKey; | ||
|
||
/** | ||
* Maximum number of iterations. | ||
*/ | ||
private final int maxIterations; | ||
|
||
/** | ||
* Constructor for connected components with a maximum number of iterations. | ||
* | ||
* @param propertyKey Property key to store the component id in. | ||
* @param maxIterations The maximum number of iterations. | ||
*/ | ||
public AnnotateWeaklyConnectedComponents(String propertyKey, int maxIterations) { | ||
super(new VertexToGellyVertexWithGradoopId(), new EdgeToGellyEdgeWithNullValue()); | ||
this.propertyKey = propertyKey; | ||
this.maxIterations = maxIterations; | ||
} | ||
|
||
@Override | ||
protected LogicalGraph executeInGelly(Graph<GradoopId, GradoopId, NullValue> graph) | ||
throws Exception { | ||
DataSet<Vertex> newVertices = new org.apache.flink.graph.library.ConnectedComponents<GradoopId, | ||
GradoopId, NullValue>(maxIterations) | ||
.run(graph) | ||
.join(currentGraph.getVertices()) | ||
.where(0) | ||
.equalTo(new Id<>()) | ||
.with(new ConnectedComponentToAttribute(propertyKey)); | ||
return currentGraph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices, | ||
currentGraph.getEdges()); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return getClass().getName(); | ||
} | ||
} |
83 changes: 83 additions & 0 deletions
83
...ava/org/gradoop/flink/algorithms/gelly/connectedcomponents/WeaklyConnectedComponents.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.connectedcomponents; | ||
|
||
import org.apache.flink.api.java.DataSet; | ||
import org.gradoop.common.model.impl.pojo.Vertex; | ||
import org.gradoop.flink.model.api.epgm.GraphCollection; | ||
import org.gradoop.flink.model.api.epgm.LogicalGraph; | ||
import org.gradoop.flink.model.api.operators.UnaryGraphToCollectionOperator; | ||
import org.gradoop.flink.model.impl.functions.epgm.PropertyRemover; | ||
|
||
/** | ||
* Split a {@link LogicalGraph} into a {@link GraphCollection} of its weakly connected components. | ||
*/ | ||
public class WeaklyConnectedComponents implements UnaryGraphToCollectionOperator { | ||
|
||
/** | ||
* Default property key to temporarily store the component id. | ||
*/ | ||
private static final String DEFAULT_PROPERTY_KEY = "_wcc_component_id"; | ||
|
||
/** | ||
* Maximum number of iterations for; | ||
*/ | ||
private final int maxIterations; | ||
|
||
/** | ||
* Property key to temporarily store the component id. | ||
*/ | ||
private final String propertyKey; | ||
|
||
/** | ||
* Initialize the operator using the default property key. | ||
* | ||
* @param maxIterations Maximum number of iterations for | ||
* {@link AnnotateWeaklyConnectedComponents}. | ||
*/ | ||
public WeaklyConnectedComponents(int maxIterations) { | ||
this(DEFAULT_PROPERTY_KEY, maxIterations); | ||
} | ||
|
||
/** | ||
* Initialize the operator. | ||
* | ||
* @param propertyKey Property key to temporarily store the component id. | ||
* @param maxIterations Maximum number of iteration for | ||
* {@link AnnotateWeaklyConnectedComponents}. | ||
*/ | ||
public WeaklyConnectedComponents(String propertyKey, int maxIterations) { | ||
this.maxIterations = maxIterations; | ||
this.propertyKey = propertyKey; | ||
} | ||
|
||
|
||
@Override | ||
public GraphCollection execute(LogicalGraph graph) { | ||
LogicalGraph withWccAnnotations = graph | ||
.callForGraph(new AnnotateWeaklyConnectedComponents(propertyKey, maxIterations)); | ||
GraphCollection split = withWccAnnotations.splitBy(propertyKey); | ||
DataSet<Vertex> vertices = split.getVertices() | ||
.map(new PropertyRemover<>(propertyKey)); | ||
return graph.getConfig().getGraphCollectionFactory().fromDataSets(split.getGraphHeads(), | ||
vertices, split.getEdges()); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return WeaklyConnectedComponents.class.getName(); | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
...p/flink/algorithms/gelly/connectedcomponents/functions/ConnectedComponentToAttribute.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.connectedcomponents.functions; | ||
|
||
import org.apache.flink.api.common.functions.JoinFunction; | ||
import org.gradoop.common.model.impl.id.GradoopId; | ||
import org.gradoop.common.model.impl.pojo.Vertex; | ||
|
||
/** | ||
* Stores the component id (as a {@link GradoopId} of one of the components vertices) as a property | ||
* in the vertex. | ||
*/ | ||
public class ConnectedComponentToAttribute | ||
implements JoinFunction<org.apache.flink.graph.Vertex<GradoopId, GradoopId>, Vertex, Vertex> { | ||
|
||
/** | ||
* Property to store the component id in. | ||
*/ | ||
private final String componentProperty; | ||
|
||
/** | ||
* Stores the connected components result as a Property. | ||
* | ||
* @param componentProperty Property name. | ||
*/ | ||
public ConnectedComponentToAttribute(String componentProperty) { | ||
this.componentProperty = componentProperty; | ||
} | ||
|
||
@Override | ||
public Vertex join(org.apache.flink.graph.Vertex<GradoopId, GradoopId> gellyVertex, | ||
Vertex gradoopVertex) { | ||
gradoopVertex.setProperty(componentProperty, gellyVertex.getValue()); | ||
return gradoopVertex; | ||
} | ||
} |
19 changes: 19 additions & 0 deletions
19
...n/java/org/gradoop/flink/algorithms/gelly/connectedcomponents/functions/package-info.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
/** | ||
* Contains functions related to the Connected Components algorithm. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.connectedcomponents.functions; |
19 changes: 19 additions & 0 deletions
19
...nk/src/main/java/org/gradoop/flink/algorithms/gelly/connectedcomponents/package-info.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
/** | ||
* Contains classes related to the Connected Components algorithm. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.connectedcomponents; |
46 changes: 46 additions & 0 deletions
46
...n/java/org/gradoop/flink/algorithms/gelly/functions/VertexToGellyVertexWithGradoopId.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/** | ||
* Copyright © 2014 - 2017 Leipzig University (Database Research Group) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.gradoop.flink.algorithms.gelly.functions; | ||
|
||
import org.apache.flink.api.java.functions.FunctionAnnotation; | ||
import org.gradoop.common.model.impl.id.GradoopId; | ||
import org.gradoop.common.model.impl.pojo.Vertex; | ||
|
||
/** | ||
* Maps EPGM vertex to a Gelly vertex with the {@link GradoopId} as its id and value. | ||
*/ | ||
@FunctionAnnotation.ForwardedFields("id->f0;id->f1") | ||
public class VertexToGellyVertexWithGradoopId implements VertexToGellyVertex<GradoopId> { | ||
/** | ||
* Reduce object instantiations | ||
*/ | ||
private final org.apache.flink.graph.Vertex<GradoopId, GradoopId> reuseVertex; | ||
|
||
/** | ||
* Constructor. | ||
*/ | ||
public VertexToGellyVertexWithGradoopId() { | ||
reuseVertex = new org.apache.flink.graph.Vertex<>(); | ||
} | ||
|
||
@Override | ||
public org.apache.flink.graph.Vertex<GradoopId, GradoopId> map(Vertex epgmVertex) { | ||
GradoopId id = epgmVertex.getId(); | ||
reuseVertex.setId(id); | ||
reuseVertex.setValue(id); | ||
return reuseVertex; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.