Skip to content

Commit

Permalink
[#533] Add IndexedGVELayout
Browse files Browse the repository at this point in the history
* provides access to label-partitioned data sets
* required for performance optimizations in Cypher engine
* fixes #533
  • Loading branch information
s1ck committed Aug 6, 2017
1 parent 5c2948d commit e39020c
Show file tree
Hide file tree
Showing 23 changed files with 658 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,21 @@ public class GraphCollection implements GraphCollectionOperators, GraphCollectio
// Data methods
//----------------------------------------------------------------------------

@Override
public boolean isGVELayout() {
return layout.isGVELayout();
}

@Override
public boolean isIndexedGVELayout() {
return layout.isIndexedGVELayout();
}

@Override
public boolean isTransactionalLayout() {
return layout.isTransactionalLayout();
}

@Override
public DataSet<Vertex> getVertices() {
return layout.getVertices();
Expand Down Expand Up @@ -137,16 +152,6 @@ public DataSet<Edge> getIncomingEdges(GradoopId vertexID) {
return layout.getIncomingEdges(vertexID);
}

@Override
public boolean hasGVELayout() {
return layout.hasGVELayout();
}

@Override
public boolean hasTransactionalLayout() {
return layout.hasTransactionalLayout();
}

/**
* {@inheritDoc}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,13 @@ public class LogicalGraph implements LogicalGraphLayout, LogicalGraphOperators {
//----------------------------------------------------------------------------

@Override
public boolean hasGVELayout() {
return layout.hasGVELayout();
public boolean isGVELayout() {
return layout.isGVELayout();
}

@Override
public boolean isIndexedGVELayout() {
return layout.isIndexedGVELayout();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,21 @@ public interface GraphCollectionLayout extends Layout {
*
* @return true, iff layout based on three separate datasets.
*/
boolean hasGVELayout();
boolean isGVELayout();

/**
* True, if the layout is based on separate datasets separated by graph, vertex and edge labels.
*
* @return true, iff layout is based on label-separated datasets
*/
boolean isIndexedGVELayout();

/**
* True, if the layout is based on a transactional data representation.
*
* @return true, iff layout based on a transactional representation
*/
boolean hasTransactionalLayout();
boolean isTransactionalLayout();
/**
* Returns the graph heads associated with the logical graphs in that
* collection.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.gradoop.flink.model.impl.layouts.transactional.tuples.GraphTransaction;

import java.util.Collection;
import java.util.Map;

/**
* Enables the construction of a {@link GraphCollectionLayout}.
Expand All @@ -31,39 +32,50 @@ public interface GraphCollectionLayoutFactory extends BaseLayoutFactory {
/**
* Creates a collection layout from the given datasets.
*
* @param graphHeads GraphHead DataSet
* @param vertices Vertex DataSet
* @return Graph collection
* @param graphHeads GraphHead DataSet
* @param vertices Vertex DataSet
* @return Graph collection layout
*/
GraphCollectionLayout fromDataSets(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices);

/**
* Creates a collection layout from the given datasets.
*
* @param graphHeads GraphHead DataSet
* @param vertices Vertex DataSet
* @param edges Edge DataSet
* @return Graph collection
* @param graphHeads GraphHead DataSet
* @param vertices Vertex DataSet
* @param edges Edge DataSet
* @return Graph collection layout
*/
GraphCollectionLayout fromDataSets(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices,
DataSet<Edge> edges);

/**
* Creates a collection layout from the given datasets indexed by label.
*
* @param graphHeads Mapping from label to graph head dataset
* @param vertices Mapping from label to vertex dataset
* @param edges Mapping from label to edge dataset
* @return Graph collection layout
*/
GraphCollectionLayout fromIndexedDataSets(Map<String, DataSet<GraphHead>> graphHeads,
Map<String, DataSet<Vertex>> vertices, Map<String, DataSet<Edge>> edges);

/**
* Creates a collection layout from the given collections.
*
* @param graphHeads Graph Head collection
* @param vertices Vertex collection
* @param edges Edge collection
* @return Graph collection
* @param graphHeads Graph Head collection
* @param vertices Vertex collection
* @param edges Edge collection
* @return Graph collection layout
*/
GraphCollectionLayout fromCollections(Collection<GraphHead> graphHeads,
Collection<Vertex> vertices, Collection<Edge> edges);

/**
* Creates a graph collection layout from a given logical graph layout.
*
* @param logicalGraphLayout input graph
* @return 1-element graph collection
* @param logicalGraphLayout input graph
* @return graph collection layout
*/
GraphCollectionLayout fromGraphLayout(LogicalGraphLayout logicalGraphLayout);

Expand All @@ -72,8 +84,8 @@ GraphCollectionLayout fromCollections(Collection<GraphHead> graphHeads,
*
* Overlapping vertices and edge are merged by Id comparison only.
*
* @param transactions transaction dataset
* @return graph collection
* @param transactions transaction dataset
* @return graph collection layout
*/
GraphCollectionLayout fromTransactions(DataSet<GraphTransaction> transactions);

Expand All @@ -82,9 +94,9 @@ GraphCollectionLayout fromCollections(Collection<GraphHead> graphHeads,
*
* Overlapping vertices and edge are merged using provided reduce functions.
*
* @param transactions transaction dataset
* @param vertexMergeReducer vertex merge function
* @param edgeMergeReducer edge merge function
* @param transactions transaction dataset
* @param vertexMergeReducer vertex merge function
* @param edgeMergeReducer edge merge function
* @return graph collection layout
*/
GraphCollectionLayout fromTransactions(DataSet<GraphTransaction> transactions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ public interface LogicalGraphLayout extends Layout {
*
* @return true, iff layout based on three separate datasets.
*/
boolean hasGVELayout();
boolean isGVELayout();

/**
* True, if the layout is based on separate datasets separated by graph, vertex and edge labels.
*
* @return true, iff layout is based on label-separated datasets
*/
boolean isIndexedGVELayout();

/**
* Returns a dataset containing a single graph head associated with that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.gradoop.common.model.impl.pojo.Vertex;

import java.util.Collection;
import java.util.Map;

/**
* Enables the construction of a {@link LogicalGraphLayout}.
Expand Down Expand Up @@ -60,6 +61,20 @@ public interface LogicalGraphLayoutFactory extends BaseLayoutFactory {
LogicalGraphLayout fromDataSets(DataSet<GraphHead> graphHead, DataSet<Vertex> vertices,
DataSet<Edge> edges);

/**
* Creates a graph layout from the given datasets indexed by label.
*
* The method assumes that the given vertices and edges are already assigned
* to the given graph head.
*
* @param graphHeads Mapping from label to graph head dataset
* @param vertices Mapping from label to vertex dataset
* @param edges Mapping from label to edge dataset
* @return Graph collection layout
*/
LogicalGraphLayout fromIndexedDataSets(Map<String, DataSet<GraphHead>> graphHeads,
Map<String, DataSet<Vertex>> vertices, Map<String, DataSet<Edge>> edges);

/**
* Creates a logical graph layout from the given graphHead, vertex and edge collections.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/**
* Copyright © 2014 - 2017 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.layouts.gve;

import org.apache.flink.api.java.DataSet;
import org.gradoop.common.model.impl.pojo.Edge;
import org.gradoop.common.model.impl.pojo.GraphHead;
import org.gradoop.common.model.impl.pojo.Vertex;
import org.gradoop.flink.model.impl.layouts.common.BaseFactory;

import java.util.Map;
import java.util.Objects;

/**
* Base class for GVE layout factories.
*/
abstract class GVEBaseFactory extends BaseFactory {

/**
* Creates a collection layout from the given datasets.
*
* @param graphHeads GraphHead DataSet
* @param vertices Vertex DataSet
* @param edges Edge DataSet
* @return GVE layout
*/
GVELayout create(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices,
DataSet<Edge> edges) {
Objects.requireNonNull(graphHeads, "GraphHead DataSet was null");
Objects.requireNonNull(vertices, "Vertex DataSet was null");
Objects.requireNonNull(edges, "Edge DataSet was null");
Objects.requireNonNull(config, "Config was null");
return new GVELayout(graphHeads, vertices, edges);
}

/**
* Creates a collection layout from the given datasets indexed by label.
*
* @param graphHeads Mapping from label to graph head dataset
* @param vertices Mapping from label to vertex dataset
* @param edges Mapping from label to edge dataset
* @return GVE layout
*/
GVELayout create(Map<String, DataSet<GraphHead>> graphHeads,
Map<String, DataSet<Vertex>> vertices, Map<String, DataSet<Edge>> edges) {
Objects.requireNonNull(graphHeads);
Objects.requireNonNull(vertices);
Objects.requireNonNull(edges);

return new GVELayout(
graphHeads.values().stream().reduce(DataSet::union)
.orElseThrow(() -> new RuntimeException("Error during graph head union")),
vertices.values().stream().reduce(DataSet::union)
.orElseThrow(() -> new RuntimeException("Error during vertex union")),
edges.values().stream().reduce(DataSet::union)
.orElseThrow(() -> new RuntimeException("Error during edge union"))
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@
import org.gradoop.flink.model.impl.functions.epgm.TransactionGraphHead;
import org.gradoop.flink.model.impl.functions.epgm.TransactionVertices;
import org.gradoop.flink.model.impl.functions.utils.First;
import org.gradoop.flink.model.impl.layouts.common.BaseFactory;
import org.gradoop.flink.model.impl.layouts.transactional.tuples.GraphTransaction;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Objects;

/**
* Responsible for creating a {@link GVELayout} from given data.
*/
public class GVECollectionLayoutFactory extends BaseFactory implements GraphCollectionLayoutFactory {
public class GVECollectionLayoutFactory extends GVEBaseFactory implements GraphCollectionLayoutFactory {

@Override
public GraphCollectionLayout fromDataSets(DataSet<GraphHead> graphHeads,
Expand All @@ -50,11 +50,13 @@ public GraphCollectionLayout fromDataSets(DataSet<GraphHead> graphHeads,
@Override
public GraphCollectionLayout fromDataSets(DataSet<GraphHead> graphHeads, DataSet<Vertex> vertices,
DataSet<Edge> edges) {
Objects.requireNonNull(graphHeads, "GraphHead DataSet was null");
Objects.requireNonNull(vertices, "Vertex DataSet was null");
Objects.requireNonNull(edges, "Edge DataSet was null");
Objects.requireNonNull(config, "Config was null");
return new GVELayout(graphHeads, vertices, edges);
return create(graphHeads, vertices, edges);
}

@Override
public GraphCollectionLayout fromIndexedDataSets(Map<String, DataSet<GraphHead>> graphHeads,
Map<String, DataSet<Vertex>> vertices, Map<String, DataSet<Edge>> edges) {
return create(graphHeads, vertices, edges);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@
import org.gradoop.common.model.impl.pojo.Edge;
import org.gradoop.common.model.impl.pojo.GraphHead;
import org.gradoop.common.model.impl.pojo.Vertex;
import org.gradoop.flink.model.api.layouts.LogicalGraphLayout;
import org.gradoop.flink.model.api.layouts.LogicalGraphLayoutFactory;
import org.gradoop.flink.model.impl.functions.graphcontainment.AddToGraph;
import org.gradoop.flink.model.impl.layouts.common.BaseFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
* Responsible for creating a {@link GVELayout} from given data.
*/
public class GVEGraphLayoutFactory extends BaseFactory implements LogicalGraphLayoutFactory {
public class GVEGraphLayoutFactory extends GVEBaseFactory implements LogicalGraphLayoutFactory {

@Override
public GVELayout fromDataSets(DataSet<Vertex> vertices) {
Expand Down Expand Up @@ -64,13 +65,19 @@ public GVELayout fromDataSets(DataSet<Vertex> vertices, DataSet<Edge> edges) {
}

@Override
public GVELayout fromDataSets(DataSet<GraphHead> graphHead, DataSet<Vertex> vertices,
public LogicalGraphLayout fromDataSets(DataSet<GraphHead> graphHead, DataSet<Vertex> vertices,
DataSet<Edge> edges) {
return new GVELayout(graphHead, vertices, edges);
return create(graphHead, vertices, edges);
}

@Override
public GVELayout fromCollections(GraphHead graphHead, Collection<Vertex> vertices,
public LogicalGraphLayout fromIndexedDataSets(Map<String, DataSet<GraphHead>> graphHeads,
Map<String, DataSet<Vertex>> vertices, Map<String, DataSet<Edge>> edges) {
return create(graphHeads, vertices, edges);
}

@Override
public LogicalGraphLayout fromCollections(GraphHead graphHead, Collection<Vertex> vertices,
Collection<Edge> edges) {
List<GraphHead> graphHeads;
if (graphHead == null) {
Expand All @@ -93,7 +100,7 @@ public GVELayout fromCollections(GraphHead graphHead, Collection<Vertex> vertice
}

@Override
public GVELayout fromCollections(Collection<Vertex> vertices, Collection<Edge> edges) {
public LogicalGraphLayout fromCollections(Collection<Vertex> vertices, Collection<Edge> edges) {
Objects.requireNonNull(vertices, "Vertex collection was null");
Objects.requireNonNull(edges, "Edge collection was null");
Objects.requireNonNull(config, "Config was null");
Expand All @@ -113,7 +120,7 @@ public GVELayout fromCollections(Collection<Vertex> vertices, Collection<Edge> e
}

@Override
public GVELayout createEmptyGraph() {
public LogicalGraphLayout createEmptyGraph() {
Collection<Vertex> vertices = new ArrayList<>(0);
Collection<Edge> edges = new ArrayList<>(0);
return fromCollections(null, vertices, edges);
Expand Down
Loading

0 comments on commit e39020c

Please sign in to comment.