Permalink
Browse files

[#737] Add apache accumulo store and test case (#759)

* add accumulo store implements and test unit

* update readme, rename confusing class name

* fix spelling mistake of test suite name
  • Loading branch information...
chpengzh authored and merando committed May 18, 2018
1 parent 2a9b652 commit 0b27a64617966f478a6b0557e1b131aeb98238ce
Showing with 4,596 additions and 151 deletions.
  1. +1 −0 .gitignore
  2. +4 −0 .travis.yml
  3. +5 −1 README.md
  4. +37 −0 dev-support/store/accumulo_table_layout
  5. 0 dev-support/{ → store}/hbase_table_layout
  6. +78 −0 gradoop-accumulo/README.md
  7. BIN gradoop-accumulo/gradoop-accumulo.png
  8. +147 −0 gradoop-accumulo/pom.xml
  9. +261 −0 gradoop-accumulo/src/main/java/org/gradoop/common/config/GradoopAccumuloConfig.java
  10. +17 −0 gradoop-accumulo/src/main/java/org/gradoop/common/config/package-info.java
  11. +408 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/AccumuloEPGMStore.java
  12. +71 −0 ...op-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/constants/AccumuloDefault.java
  13. +75 −0 ...oop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/constants/AccumuloTables.java
  14. +20 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/constants/package-info.java
  15. +111 −0 ...-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/handler/AccumuloEdgeHandler.java
  16. +90 −0 ...accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/handler/AccumuloGraphHandler.java
  17. +52 −0 ...p-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/handler/AccumuloRowHandler.java
  18. +103 −0 ...ccumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/handler/AccumuloVertexHandler.java
  19. +16 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/handler/package-info.java
  20. +124 −0 ...src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/client/CacheClosableIterator.java
  21. +28 −0 ...ulo/src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/client/CloseableIterator.java
  22. +16 −0 ...accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/client/package-info.java
  23. +143 −0 .../src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/tserver/BaseElementIterator.java
  24. +131 −0 .../src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/tserver/GradoopEdgeIterator.java
  25. +122 −0 ...main/java/org/gradoop/common/storage/impl/accumulo/iterator/tserver/GradoopGraphHeadIterator.java
  26. +126 −0 ...rc/main/java/org/gradoop/common/storage/impl/accumulo/iterator/tserver/GradoopVertexIterator.java
  27. +22 −0 ...ccumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/iterator/tserver/package-info.java
  28. +16 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/package-info.java
  29. +66 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/row/EdgeRow.java
  30. +66 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/row/ElementRow.java
  31. +24 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/row/GraphHeadRow.java
  32. +40 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/row/VertexRow.java
  33. +20 −0 gradoop-accumulo/src/main/java/org/gradoop/common/storage/impl/accumulo/row/package-info.java
  34. +95 −0 gradoop-accumulo/src/main/java/org/gradoop/common/utils/JsonUtils.java
  35. +16 −0 gradoop-accumulo/src/main/java/org/gradoop/common/utils/package-info.java
  36. +66 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/AccumuloBase.java
  37. +75 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/AccumuloDataSink.java
  38. +65 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/AccumuloDataSource.java
  39. +209 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/inputformats/BaseInputFormat.java
  40. +89 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/inputformats/EdgeInputFormat.java
  41. +89 −0 ...-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/inputformats/GraphHeadInputFormat.java
  42. +89 −0 ...oop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/inputformats/VertexInputFormat.java
  43. +17 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/inputformats/package-info.java
  44. +152 −0 ...oop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/outputformats/BaseOutputFormat.java
  45. +69 −0 ...oop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/outputformats/EdgeOutputFormat.java
  46. +68 −0 ...ccumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/outputformats/GraphHeadOutputFormat.java
  47. +69 −0 ...p-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/outputformats/VertexOutputFormat.java
  48. +17 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/outputformats/package-info.java
  49. +17 −0 gradoop-accumulo/src/main/java/org/gradoop/flink/io/impl/accumulo/package-info.java
  50. +117 −0 gradoop-accumulo/src/test/java/org/gradoop/AccumuloTestSuite.java
  51. +293 −0 gradoop-accumulo/src/test/java/org/gradoop/common/storage/impl/accumulo/AccumuloGraphStoreTest.java
  52. +16 −0 gradoop-accumulo/src/test/java/org/gradoop/common/storage/impl/accumulo/package-info.java
  53. +143 −0 gradoop-accumulo/src/test/java/org/gradoop/flink/io/impl/accumulo/AccumuloDataSinkSourceTest.java
  54. +17 −0 gradoop-accumulo/src/test/java/org/gradoop/flink/io/impl/accumulo/package-info.java
  55. +17 −0 gradoop-accumulo/src/test/java/org/gradoop/package-info.java
  56. +1 −0 gradoop-accumulo/src/test/resources/gdl/example.gdl
  57. +44 −0 gradoop-accumulo/src/test/resources/gdl/social_network.gdl
  58. +9 −0 gradoop-accumulo/src/test/resources/log4j-test.properties
  59. +29 −40 {gradoop-hbase → gradoop-flink}/src/main/java/org/gradoop/common/config/GradoopStoreConfig.java
  60. +16 −0 gradoop-flink/src/main/java/org/gradoop/common/config/package-info.java
  61. +56 −0 gradoop-flink/src/main/java/org/gradoop/common/storage/api/EPGMConfigProvider.java
  62. +74 −0 gradoop-flink/src/main/java/org/gradoop/common/storage/api/EPGMGraphInput.java
  63. +21 −90 .../EPGMStore.java → gradoop-flink/src/main/java/org/gradoop/common/storage/api/EPGMGraphOutput.java
  64. +44 −0 gradoop-flink/src/main/java/org/gradoop/common/storage/api/EPGMStore.java
  65. +16 −0 gradoop-flink/src/main/java/org/gradoop/common/storage/api/package-info.java
  66. +12 −7 gradoop-hbase/src/main/java/org/gradoop/common/config/GradoopHBaseConfig.java
  67. +13 −13 gradoop-hbase/src/main/java/org/gradoop/common/storage/impl/hbase/HBaseEPGMStore.java
  68. +16 −0 pom.xml
@@ -1,3 +1,4 @@
.DS_Store
.idea
target
output
@@ -1,3 +1,7 @@
# https://docs.travis-ci.com/user/reference/overview/
sudo: required
dist: trusty
cache:
directories:
- $HOME/.m2
@@ -152,9 +152,13 @@ The main contents of that module are the EPGM data model and a corresponding POJ
implementation which is used in Flink™. The persistent representation of the EPGM
is also contained in gradoop-common and together with its mapping to HBase™.
### gradoop-accumulo
Input and output formats for reading and writing graph collections from [Apache Accumulo](https://accumulo.apache.org/).
### gradoop-hbase
Input and output formats for reading and writing graph collections from Apache HBase.
Input and output formats for reading and writing graph collections from [Apache HBase](https://hbase.apache.org/).
### gradoop-flink
@@ -0,0 +1,37 @@
GraphData (Table graph)
----------*-------------*-------------------------*---------------*---------------------
row | cf | cq | timestamp | value
----------*-------------*-------------------------*---------------*---------------------
| label | | | {label}
{id} *-------------*-------------------------*---------------*---------------------
| property | property key | | {property}
----------*-------------*-------------------------*---------------*---------------------
VertexData (Table vertex)
----------*-------------*-------------------------*---------------*---------------------
row | cf | cq | timestamp | value
----------*-------------*-------------------------*---------------*---------------------
| label | | | {label}
*-------------*-------------------------*---------------*---------------------
{id} | property | property key | | {property}
*-------------*-------------------------*---------------*---------------------
| graph | {graph id} | |
----------*-------------*-------------------------*---------------*---------------------
EdgeData (Table edge)
----------*-------------*-------------------------*---------------*---------------------
row | cf | cq | timestamp | value
----------*-------------*-------------------------*---------------*---------------------
| label | | | {label}
*-------------*-------------------------*---------------*---------------------
| source | | | {vertex id}
*-------------*-------------------------*---------------*---------------------
{id} | target | | | {vertex id}
*-------------*-------------------------*---------------*---------------------
| property | property key | | {property}
*-------------*-------------------------*---------------*---------------------
| graph | {graph id} | |
----------*-------------*-------------------------*---------------*---------------------
File renamed without changes.
@@ -0,0 +1,78 @@
# Accumulo Store for Gradoop
[Apache Accumulo](https://accumulo.apache.org/) is a key/value store based on the design of Google's [BigTable](https://research.google.com/archive/bigtable.html). With this adapter implementation you can use Apache Accumulo as DataSource or DataSink for your graph data.
## Adding a Accumulo Runtime Iterator
Run the instructions below to build your accumulo runtime iterator:
```
cd gradoop-accumulo && mvn clean install
```
Then copy `gradoop-accumulo/target/gradoop-accumulo-<ver>.jar` to your accumulo runtime libs
(native
or
hdfs).
If you use a native external lib , just copy it to `$ACCUMULO_HOME/lib/ext`.
For more details about accumulo, please visit [Apache Accumulo](https://accumulo.apache.org/).
## Creation of an Accumulo based Graph-store
```
// flink execution env
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
// create gradoop accumulo configuration
GradoopAccumuloConfig config = GradoopAccumuloConfig.create(env)
.set(GGradoopAccumuloConfig.ACCUMULO_USER, {user})
.set(GGradoopAccumuloConfig.ACCUMULO_INSTANCE, {instance})
.set(GGradoopAccumuloConfig.ZOOKEEPER_HOSTS, {comma separated zookeeper host list})
.set(GGradoopAccumuloConfig.ACCUMULO_PASSWD, {password})
.set(GGradoopAccumuloConfig.ACCUMULO_TABLE_PREFIX, {table prefix});
// create store
AccumuloStore graphStore = new AccumuloStore(config);
```
> let's just add some graph elements
```
graphStore.writeGraphHead(graphHead);
graphStore.wirteVertex(vertex);
graphStore.writeEdge(edge);
graphStore.flush();
```
## Accessing Data
> Example for DataSink & DataSource
```
// data source
DataSource accumuloDataSource = new AccumuloDataSource(config);
GraphCollection result = accumuloDataSource.cypher(
"MATCH (u1:Person)<-[:hasModerator]-(f:Forum)" +
"(u2:Person)<-[:hasMember]-(f)" +
"WHERE u1.name = \"Alice\"");
// data sink
DataSink accumuloSink = new AccumuloDataSink(config);
accumuloSink.write(result);
```
## Store Implement
Here's the data flow about store implement:
![](gradoop-accumulo.png)
- both `read`, `write` operations are spitted into multi ranges, which are pre-defined in accumulo
tserver
- The changes of each `EPGMVertex/EPGMGraph/EPGMEdge` are made atomically.
See [accumulo_table_layout](../dev-support/store/accumulo_table_layout) for more detail
Binary file not shown.
@@ -0,0 +1,147 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gradoop-parent</artifactId>
<groupId>org.gradoop</groupId>
<version>0.3.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gradoop-accumulo</artifactId>
<name>Gradoop Accumulo</name>
<properties>
<accumuloTestSuite>**/AccumuloTestSuite.class</accumuloTestSuite>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>3.0.0</version>
<dependencies>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-checkstyle</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<configuration>
<configLocation>gradoop/checkstyle.xml</configLocation>
<headerLocation>gradoop/LICENSE.txt</headerLocation>
<includeResources>false</includeResources>
<includeTestResources>false</includeTestResources>
<suppressionsLocation>gradoop/checkstyle-suppressions.xml</suppressionsLocation>
<suppressionsFileExpression>checkstyle.suppressions.file
</suppressionsFileExpression>
<failsOnError>true</failsOnError>
<consoleOutput>true</consoleOutput>
</configuration>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>${accumuloTestSuite}</include>
</includes>
</configuration>
</plugin>
<!-- Creates an extra *-tests.jar which can be used as dependency -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<!-- Compile dependencies -->
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-flink</artifactId>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
</dependency>
<!-- Accumulo -->
<dependency>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-core</artifactId>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-minicluster</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-common</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.gradoop</groupId>
<artifactId>gradoop-flink</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<!-- Flink -->
<!-- needs to be added even though it is already a test dependency of gradoop-flink
Maven issue has been unresolved for 12(!) years
see https://issues.apache.org/jira/browse/MNG-1378 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-test-utils_2.11</artifactId>
<scope>test</scope>
</dependency>
<!-- Others -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Oops, something went wrong.

0 comments on commit 0b27a64

Please sign in to comment.