Permalink
Browse files

[#1004] create module, package structure and pom for data integration…

… module (#1015)

* fixes [#1004] created module, package structure and pom
* fixes [#1009] added invert edges structural transformation
  • Loading branch information...
merando committed Oct 10, 2018
1 parent 3091922 commit fd103be122047ab3771b9040109b937a5db5e844
@@ -0,0 +1,167 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-parent</artifactId>
<version>0.5.0-SNAPSHOT</version>
</parent>
<artifactId>gradoop-data-integration</artifactId>
<packaging>jar</packaging>
<name>Gradoop Data Integration</name>
<description>Support for graph data integration in gradoop, e.g. importer, transformations and so on.</description>
<profiles>
<profile>
<id>release</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>snapshot</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>javadoc</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>
</profiles>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<!-- Creates an extra *-tests.jar which can be used as dependency -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
<dependencies>
<!-- Gradoop -->
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-flink</artifactId>
</dependency>
<!-- Flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
</dependency>
<!-- Testing -->
<!-- Gradoop -->
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-common</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-flink</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<!-- Flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-test-utils_2.11</artifactId>
<scope>test</scope>
</dependency>
<!-- Others -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
@@ -0,0 +1,20 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Contains data importer that are capable of importing data from other (non-graph-)sources into
* the gradoop format.
*/
package org.gradoop.dataintegration.importer;
@@ -0,0 +1,62 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.dataintegration.transformation;
import org.apache.flink.util.Preconditions;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.pojo.Edge;
import org.gradoop.flink.model.api.functions.TransformationFunction;
/**
* An edge transformation that swaps the source and target of an edge with a given label and
* renames it.
*/
public class InvertEdges implements TransformationFunction<Edge> {
/**
* The label of the edges that should be inverted.
*/
private final String forEdgeLabel;
/**
* The label of the inverted edges.
*/
private final String newLabel;
/**
* Constructs a new InvertEdges edge transformation function.
*
* @param forEdgeLabel The label of the edges that should be inverted.
* @param newLabel The label of the inverted edges.
*/
public InvertEdges(String forEdgeLabel, String newLabel) {
this.forEdgeLabel = Preconditions.checkNotNull(forEdgeLabel);
this.newLabel = Preconditions.checkNotNull(newLabel);
}
@Override
public Edge apply(Edge current, Edge transformed) {
if (current.getLabel().equals(forEdgeLabel)) {
GradoopId source = current.getSourceId();
GradoopId target = current.getTargetId();
current.setSourceId(target);
current.setTargetId(source);
current.setLabel(newLabel);
}
return current;
}
}
@@ -0,0 +1,19 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package contains operators which are executing structural changes to a gradoop graph.
*/
package org.gradoop.dataintegration.transformation;
@@ -0,0 +1,107 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.dataintegration.transformation;
import org.apache.flink.api.java.io.LocalCollectionOutputFormat;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.pojo.Edge;
import org.gradoop.common.model.impl.pojo.Vertex;
import org.gradoop.flink.model.GradoopFlinkTestBase;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.impl.functions.epgm.ByLabel;
import org.gradoop.flink.model.impl.functions.filters.And;
import org.junit.Assert;
import org.junit.Test;
import java.util.*;
/**
* Tests for the invert edge operator.
*/
public class InvertEdgesTest extends GradoopFlinkTestBase {
/**
* Test to ensure that the first label is never null.
*/
@Test(expected = NullPointerException.class)
public void firstNullArgumentTest() {
new InvertEdges(null, "foo");
}
/**
* Test to ensure that the second label is never null.
*/
@Test(expected = NullPointerException.class)
public void secondNullArgumentTest() {
new InvertEdges("foo", null);
}
/**
* Test whether edges are inverted correctly.
*
* @throws Exception If test data can't be loaded.
*/
@Test
public void testInvert() throws Exception {
final String toInvertLabel = "hasInterest";
final String invertedLabel = "foobar";
LogicalGraph social = getSocialNetworkLoader().getLogicalGraph();
InvertEdges invertEdges = new InvertEdges(toInvertLabel, invertedLabel);
LogicalGraph invertedEdgeGraph = social.transformEdges(invertEdges);
long edgesBefore = social.getEdges().count();
long edgesToChange = social.getEdges().filter(new ByLabel<>(toInvertLabel)).count();
long edgesAfter = invertedEdgeGraph.getEdges().count();
Assert.assertEquals(edgesToChange, 4); // we have 4 "hasInterest" edges
Assert.assertEquals(edgesBefore, edgesAfter); // ensures no new edges are created
long oldEdgeCount = invertedEdgeGraph.getEdges().filter(new ByLabel<>(toInvertLabel)).count();
Assert.assertEquals(oldEdgeCount, 0); // no edges with the old label should exist
/*
* We now have to check whether all of these hasInterest edges are inverted.
* (eve)-[:hasInterest]->(databases)
* (alice)-[:hasInterest]->(databases)
* (frank)-[:hasInterest]->(hadoop)
* (dave)-[:hasInterest]->(hadoop)
*/
List<Vertex> vertices = new ArrayList<>();
invertedEdgeGraph.getVertices()
.filter(new And<>(new ByLabel<>("Person"), new ByLabel<>("Tag")))
.output(new LocalCollectionOutputFormat<>(vertices));
List<Edge> newEdges = new ArrayList<>();
invertedEdgeGraph
.getEdgesByLabel(invertedLabel)
.output(new LocalCollectionOutputFormat<>(newEdges));
Map<GradoopId, String> idMap = new HashMap<>();
vertices.forEach(v -> idMap.put(v.getId(), v.getPropertyValue("name").getString()));
Set<String> tags = new HashSet<>(Arrays.asList("Databases", "Hadoop"));
Set<String> persons = new HashSet<>(Arrays.asList("Eve", "Alice", "Frank", "Dave"));
for(Edge e : newEdges) {
String sourceName = idMap.get(e.getSourceId());
String targetName = idMap.get(e.getTargetId());
Assert.assertTrue("source: " + sourceName + " | target: " + targetName,
tags.contains(sourceName) && persons.contains(targetName));
persons.remove(targetName);
}
}
}
Oops, something went wrong.

0 comments on commit fd103be

Please sign in to comment.