Skip to content

Commit

Permalink
apacheGH-1912: GraphMem2
Browse files Browse the repository at this point in the history
New in-memory, general-purpose, non-transactional graphs as successors of GraphMem:
All variants strictly use term-equality and do not support Iterator#remove.
(GraphMem uses value-equality for object nodes)

GraphMem2Legacy:
- Purpose: Use this graph implementation if you want to maintain the 'old' behavior of GraphMem or if your memory constraints
  prevent you from utilizing more memory-intensive solutions.
- Slightly improved performance compared to GraphMem
- Simplified implementation, primarily due to lack of support for Iterator#remove
- The heritage of GraphMem:
  - Same basic structure
  - Same memory consumption
- Also based on HashCommon

GraphMem2Fast:
- Purpose: GraphMem2Fast is a strong candidate for becoming the new default in-memory graph in the upcoming Jena 5, thanks to
  its improved performance and relatively minor increase in memory usage.
- Faster than GraphMem2Legacy (specially Graph#add, Graph#find and Graph#stream)
- Memory consumption is about 6-35% higher than GraphMem2Legacy
- Maps and sets are not based on HashCommon, but use a faster custom alternative (only #remove is a bit slower)
- Benefits from multiple small optimizations
- The heritage of GraphMem:
  - Also uses 3 hash-maps indexed by subjects, predicates, and objects
  - Values of the maps also switch from arrays to hash sets for the triples

GraphMem2Roaring
- Purpose: GraphMem2Roaring is ideal for handling extremely large graphs. If you frequently work with such massive data
  structures, this implementation could be your top choice.
- Graph#contains is faster than GraphMem2Fast
- Better performance than GraphMem2Fast for operations with triple matches for the pattern S_O, SP_, and _PO on large graphs,
  due to bit-operations to find intersecting triples
- Memory consumption is about 7-99% higher than GraphMem2Legacy
- Suitable for really large graphs like bsbm-5m.nt.gz, bsbm-25m.nt.gz, and possibly even larger
- Simple and straightforward implementation
- No heritage of GraphMem
- Internal structure:
  - One indexed hash set (same as GraphMem2Fast uses) that holds all triples
  - Three hash maps indexed by subjects, predicates, and objects with RoaringBitmaps as values
  - The bitmaps contain the indices of the triples in the central hash set

Other Changes:
- org.apache.jena.graph.test.TestGraph
  - added GraphMem2Fast, GraphMem2Legacy and GraphMem2Roaring to the suite
- GraphMem:
  - moved property "TripleStore store" from GraphMemBase to GraphMem
    --> needed this to make a clean GraphMem2, which also extends GraphMem but the TripleStore interface is slightly different.
- pom.xml:
  - added dependency roaringbitmap 0.9.44
- jena-benchmarks-jmh
  - added the three new graph implementations to the benchmarks
  - randomized the order of test data in some benchmarks to prevent them from showing order dependent behaviour
  - added benchmarks for sets and maps comparing
    - HashCommonSet vs. FastHashSet vs. Java HashSet
    - HashCommonMap vs. FastHashMap vs. Java HashMap
  • Loading branch information
bern-SOPTIM committed Jun 25, 2023
1 parent f047603 commit 094aa0a
Show file tree
Hide file tree
Showing 104 changed files with 11,779 additions and 439 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ public class TestGraphAdd {

@Param({
"GraphMem (current)",
"GraphMem2Fast (current)",
"GraphMem2Legacy (current)",
"GraphMem2Roaring (current)",
"GraphMem (Jena 4.8.0)",
})
public String param1_GraphImplementation;

java.util.function.Supplier<Object> graphAdd;
private Context trialContext;

private List<Triple> triplesCurrent;
private List<org.apache.shadedJena480.graph.Triple> triples480;

java.util.function.Supplier<Object> graphAdd;

@Benchmark
public Object graphAdd() {
return graphAdd.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;

import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.function.Predicate;

@State(Scope.Benchmark)
public class TestGraphContainsAnything {


@Param({
"../testing/cheeses-0.1.ttl",
"../testing/pizza.owl.rdf",
Expand All @@ -43,18 +46,18 @@ public class TestGraphContainsAnything {

@Param({
"GraphMem (current)",
"GraphMem2Fast (current)",
"GraphMem2Legacy (current)",
"GraphMem2Roaring (current)",
"GraphMem (Jena 4.8.0)",
})
public String param1_GraphImplementation;

java.util.function.Function<String, Boolean> graphContains;
private Graph sutCurrent;
private org.apache.shadedJena480.graph.Graph sut480;

private List<Triple> triplesToFindCurrent;
private List<org.apache.shadedJena480.graph.Triple> triplesToFind480;

java.util.function.Function<String, Boolean> graphContains;

@Benchmark
public boolean graphContainsS__() {
return graphContains.apply("S__");
Expand All @@ -65,6 +68,11 @@ public boolean graphContains_P_() {
return graphContains.apply("_P_");
}

@Benchmark
public boolean graphContains__O() {
return graphContains.apply("__O");
}

@Benchmark
public boolean graphContainsSP_() {
return graphContains.apply("SP_");
Expand All @@ -80,15 +88,12 @@ public boolean graphContains_PO() {
return graphContains.apply("_PO");
}

@Benchmark
public boolean graphContains__O() {
return graphContains.apply("__O");
}


private boolean graphContainsCurrent(String pattern) {
var containsPredicate = getContainsPredicateByPatternCurrent(pattern);
var found = false;
for(var t: triplesToFindCurrent) {
for (var t : triplesToFindCurrent) {
found = containsPredicate.test(t);
Assert.assertTrue(found);
}
Expand All @@ -98,7 +103,7 @@ private boolean graphContainsCurrent(String pattern) {
private boolean graphContains480(String pattern) {
var containsPredicate = getContainsPredicateByPattern480(pattern);
var found = false;
for(var t: triplesToFind480) {
for (var t : triplesToFind480) {
found = containsPredicate.test(t);
Assert.assertTrue(found);
}
Expand Down Expand Up @@ -147,30 +152,34 @@ Predicate<org.apache.shadedJena480.graph.Triple> getContainsPredicateByPattern48
public void setupTrial() throws Exception {
var trialContext = new Context(param1_GraphImplementation);
switch (trialContext.getJenaVersion()) {
case CURRENT:
{
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContainsCurrent;

var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFindCurrent = Releases.current.cloneTriples(triples);
}
break;
case JENA_4_8_0:
{
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContains480;

var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFind480 = Releases.v480.cloneTriples(triples);
}
break;
case CURRENT: {
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContainsCurrent;

var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFindCurrent = Releases.current.cloneTriples(triples);

/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
Collections.shuffle(this.triplesToFindCurrent, new Random(4721));
}
break;
case JENA_4_8_0: {
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContains480;

var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);

/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
this.triplesToFind480 = Releases.v480.cloneTriples(triples);
Collections.shuffle(this.triplesToFind480, new Random(4721));
}
break;
default:
throw new IllegalArgumentException("Unknown Jena version: " + trialContext.getJenaVersion());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.openjdk.jmh.runner.Runner;

import java.util.List;
import java.util.Random;


@State(Scope.Benchmark)
Expand All @@ -42,27 +43,27 @@ public class TestGraphContainsTriple {
public String param0_GraphUri;

@Param({
"GraphMem (Jena 4.8.0)",
"GraphMem (current)",
"GraphMem2Fast (current)",
"GraphMem2Legacy (current)",
"GraphMem2Roaring (current)",
"GraphMem (Jena 4.8.0)",
})
public String param1_GraphImplementation;

java.util.function.Supplier<Boolean> graphContains;
private Graph sutCurrent;
private org.apache.shadedJena480.graph.Graph sut480;

private List<Triple> triplesToFindCurrent;
private List<org.apache.shadedJena480.graph.Triple> triplesToFind480;

java.util.function.Supplier<Boolean> graphContains;

@Benchmark
public boolean graphContains() {
return graphContains.get();
}

private boolean graphContainsCurrent() {
var found = false;
for(var t: triplesToFindCurrent) {
for (var t : triplesToFindCurrent) {
found = sutCurrent.contains(t);
Assert.assertTrue(found);
}
Expand All @@ -71,7 +72,7 @@ private boolean graphContainsCurrent() {

private boolean graphContains480() {
var found = false;
for(var t: triplesToFind480) {
for (var t : triplesToFind480) {
found = sut480.contains(t);
Assert.assertTrue(found);
}
Expand All @@ -83,30 +84,34 @@ private boolean graphContains480() {
public void setupTrial() throws Exception {
var trialContext = new Context(param1_GraphImplementation);
switch (trialContext.getJenaVersion()) {
case CURRENT:
{
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContainsCurrent;

var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFindCurrent = Releases.current.cloneTriples(triples);
}
break;
case JENA_4_8_0:
{
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContains480;

var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFind480 = Releases.v480.cloneTriples(triples);
}
break;
case CURRENT: {
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContainsCurrent;

var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFindCurrent = Releases.current.cloneTriples(triples);
/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
java.util.Collections.shuffle(this.triplesToFindCurrent, new Random(4721));
}
break;
case JENA_4_8_0: {
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphContains = this::graphContains480;

var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);

/*clone the triples because they should not be the same objects*/
this.triplesToFind480 = Releases.v480.cloneTriples(triples);
/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
java.util.Collections.shuffle(this.triplesToFind480, new Random(4721));
}
break;
default:
throw new IllegalArgumentException("Unknown Jena version: " + trialContext.getJenaVersion());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.openjdk.jmh.runner.Runner;

import java.util.List;
import java.util.Random;

@State(Scope.Benchmark)
public class TestGraphDelete {
Expand All @@ -42,23 +43,21 @@ public class TestGraphDelete {

@Param({
"GraphMem (current)",
"GraphMem2Fast (current)",
"GraphMem2Legacy (current)",
"GraphMem2Roaring (current)",
"GraphMem (Jena 4.8.0)",
})
public String param1_GraphImplementation;
java.util.function.Supplier<Integer> graphDelete;
private Context trialContext;

private Graph sutCurrent;
private org.apache.shadedJena480.graph.Graph sut480;

private List<Triple> allTriplesCurrent;
private List<org.apache.shadedJena480.graph.Triple> allTriples480;

private List<Triple> triplesToDeleteFromSutCurrent;
private List<org.apache.shadedJena480.graph.Triple> triplesToDeleteFromSut480;


java.util.function.Supplier<Integer> graphDelete;

@Benchmark
public int graphDelete() {
return graphDelete.get();
Expand All @@ -84,13 +83,19 @@ public void setupInvocation() {
this.allTriplesCurrent.forEach(this.sutCurrent::add);
/*cloning is important so that the triples are not reference equal */
this.triplesToDeleteFromSutCurrent = Releases.current.cloneTriples(this.allTriplesCurrent);
/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
java.util.Collections.shuffle(this.triplesToDeleteFromSutCurrent, new Random(4721));
break;

case JENA_4_8_0:
this.sut480 = Releases.v480.createGraph(this.trialContext.getGraphClass());
this.allTriples480.forEach(this.sut480::add);
/*cloning is important so that the triples are not reference equal */
this.triplesToDeleteFromSut480 = Releases.v480.cloneTriples(this.allTriples480);
/* Shuffle is import because the order might play a role. We want to test the performance of the
contains method regardless of the order */
java.util.Collections.shuffle(this.triplesToDeleteFromSut480, new Random(4721));
break;

default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ public class TestGraphFindAllWithForEachRemaining {

@Param({
"GraphMem (current)",
"GraphMem2Fast (current)",
"GraphMem2Legacy (current)",
"GraphMem2Roaring (current)",
"GraphMem (Jena 4.8.0)",
})
public String param1_GraphImplementation;
java.util.function.Supplier<Long> graphFindAll;
private Graph sutCurrent;
private org.apache.shadedJena480.graph.Graph sut480;

java.util.function.Supplier<Long> graphFindAll;

@Benchmark
public Long graphFindAll() {
return graphFindAll.get();
Expand Down Expand Up @@ -78,24 +80,22 @@ private Long graphFindAll480() {
public void setupTrial() throws Exception {
Context trialContext = new Context(param1_GraphImplementation);
switch (trialContext.getJenaVersion()) {
case CURRENT:
{
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphFindAll = this::graphFindAllCurrent;
case CURRENT: {
this.sutCurrent = Releases.current.createGraph(trialContext.getGraphClass());
this.graphFindAll = this::graphFindAllCurrent;

var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);
}
break;
case JENA_4_8_0:
{
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphFindAll = this::graphFindAll480;
var triples = Releases.current.readTriples(param0_GraphUri);
triples.forEach(this.sutCurrent::add);
}
break;
case JENA_4_8_0: {
this.sut480 = Releases.v480.createGraph(trialContext.getGraphClass());
this.graphFindAll = this::graphFindAll480;

var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);
}
break;
var triples = Releases.v480.readTriples(param0_GraphUri);
triples.forEach(this.sut480::add);
}
break;
default:
throw new IllegalArgumentException("Unknown Jena version: " + trialContext.getJenaVersion());
}
Expand Down

0 comments on commit 094aa0a

Please sign in to comment.