From 781895ce64e062c7f2268a78189a777c39b92844 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Fri, 27 Jan 2017 13:31:59 +0000 Subject: [PATCH 1/4] Improve bulk operations depending on relative sizes of graphs --- .../java/org/apache/jena/graph/GraphUtil.java | 123 ++++++++++-------- 1 file changed, 68 insertions(+), 55 deletions(-) diff --git a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java index 149e9327aa3..ef6dc081d8d 100644 --- a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java +++ b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java @@ -18,9 +18,11 @@ package org.apache.jena.graph; +import java.util.ArrayList; import java.util.Iterator ; import java.util.List ; import java.util.Set ; + import org.apache.jena.graph.impl.GraphWithPerform ; import org.apache.jena.util.IteratorCollection ; import org.apache.jena.util.iterator.ExtendedIterator ; @@ -115,38 +117,26 @@ public static void add(Graph graph, Triple[] triples) { } public static void add(Graph graph, List triples) { + addIteratorWorkerDirect(graph, triples.iterator()); if ( OldStyle && graph instanceof GraphWithPerform ) - { - GraphWithPerform g = (GraphWithPerform)graph ; - for (Triple t : triples) - g.performAdd(t) ; graph.getEventManager().notifyAddList(graph, triples) ; - } else - { - for (Triple t : triples) - graph.add(t) ; - } } public static void add(Graph graph, Iterator it) { - // Materialize to avoid ConcurrentModificationException. - List s = IteratorCollection.iteratorToList(it) ; - if ( OldStyle && graph instanceof GraphWithPerform ) - { - GraphWithPerform g = (GraphWithPerform)graph ; - for (Triple t : s) - g.performAdd(t) ; + if ( OldStyle && graph instanceof GraphWithPerform ) { + // Materialize for the notify. + List s = IteratorCollection.iteratorToList(it) ; + addIteratorWorkerDirect(graph, s.iterator()); graph.getEventManager().notifyAddIterator(graph, s) ; } - else - { - for (Triple t : s) - graph.add(t) ; - } + else + addIteratorWorker(graph, it); } /** Add triples into the destination (arg 1) from the source (arg 2)*/ public static void addInto(Graph dstGraph, Graph srcGraph ) { + if ( dstGraph == srcGraph ) + return ; dstGraph.getPrefixMapping().setNsPrefixes(srcGraph.getPrefixMapping()) ; addIteratorWorker(dstGraph, GraphUtil.findAll( srcGraph )); dstGraph.getEventManager().notifyAddGraph( dstGraph, srcGraph ); @@ -154,16 +144,17 @@ public static void addInto(Graph dstGraph, Graph srcGraph ) { private static void addIteratorWorker( Graph graph, Iterator it ) { List s = IteratorCollection.iteratorToList( it ); - if ( OldStyle && graph instanceof GraphWithPerform ) - { - GraphWithPerform g = (GraphWithPerform)graph ; - for (Triple t : s ) - g.performAdd(t) ; - } - else - { - for (Triple t : s ) - graph.add(t) ; + addIteratorWorkerDirect(graph, s.iterator()); + } + + private static void addIteratorWorkerDirect( Graph graph, Iterator it ) { + if ( OldStyle && graph instanceof GraphWithPerform ) { + GraphWithPerform g = (GraphWithPerform)graph; + while (it.hasNext()) + g.performAdd(it.next()); + } else { + while (it.hasNext()) + graph.add(it.next()); } } @@ -180,48 +171,70 @@ public static void delete(Graph graph, Triple[] triples) { } public static void delete(Graph graph, List triples) - { - if ( OldStyle && graph instanceof GraphWithPerform ) { - GraphWithPerform g = (GraphWithPerform)graph ; - for ( Triple t : triples ) - g.performDelete(t) ; + { + deleteIteratorWorkerDirect(graph, triples.iterator()); + if ( OldStyle && graph instanceof GraphWithPerform ) graph.getEventManager().notifyDeleteList(graph, triples) ; - } else { - for ( Triple t : triples ) - graph.delete(t) ; - } } public static void delete(Graph graph, Iterator it) { - // Materialize to avoid ConcurrentModificationException. - List s = IteratorCollection.iteratorToList(it) ; if ( OldStyle && graph instanceof GraphWithPerform ) { - GraphWithPerform g = (GraphWithPerform)graph ; - for ( Triple t : s ) - g.performDelete(t) ; + // Materialize for the notify. + List s = IteratorCollection.iteratorToList(it) ; + deleteIteratorWorkerDirect(graph, s.iterator()); graph.getEventManager().notifyDeleteIterator(graph, s) ; - } else { - for ( Triple t : s ) - graph.delete(t) ; - } + } else + deleteIteratorWorker(graph, it); } - /** Delete triples the destination (arg 1) as given in the source (arg 2) */ + /** Delete triples in the destination (arg 1) as given in the source (arg 2) */ public static void deleteFrom(Graph dstGraph, Graph srcGraph) { - deleteIteratorWorker(dstGraph, GraphUtil.findAll(srcGraph)) ; + if ( dstGraph == srcGraph ) { + dstGraph.clear(); + return; + } + if ( dstGraph.size() > srcGraph.size() ) { + // Loop on srcGraph + deleteIteratorWorker(dstGraph, GraphUtil.findAll(srcGraph)) ; + dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; + return; + } + // dstGraph smaller. + List toBeDeleted = new ArrayList<>(); + // Loop on dstGraph + Iterator iter = dstGraph.find(null, null, null); + for( ; iter.hasNext() ; ) { + Triple t = iter.next(); + if ( srcGraph.contains(t) ) + toBeDeleted.add(t); + } dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; + deleteIteratorWorkerDirect(dstGraph, toBeDeleted.iterator()); } + /** + * Delete the triples supplied by an iterator. This function is "concurrent + * modification" safe - it internally takes a copy of the iterator. + */ private static void deleteIteratorWorker(Graph graph, Iterator it) { List s = IteratorCollection.iteratorToList(it) ; + deleteIteratorWorkerDirect(graph, s.iterator()); + } + + /** + * Delete the triples supplied by an iterator. This function is not "concurrent + * modification" safe; it assumes it can use the iterator while deleting from the + * graph. + */ + private static void deleteIteratorWorkerDirect(Graph graph, Iterator it) { if ( OldStyle && graph instanceof GraphWithPerform ) { GraphWithPerform g = (GraphWithPerform)graph ; - for ( Triple t : s ) - g.performDelete(t) ; + while(it.hasNext()) + g.performDelete(it.next()) ; } else { - for ( Triple t : s ) - graph.delete(t) ; + while(it.hasNext()) + graph.delete(it.next()); } } From 3bbf5a1604abe6d6664501da66eff477044743c5 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Sat, 28 Jan 2017 12:13:16 +0000 Subject: [PATCH 2/4] Extra tests for bulk addInto/deleteFrom. --- .../jena/graph/test/AbstractTestGraph.java | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/jena-core/src/test/java/org/apache/jena/graph/test/AbstractTestGraph.java b/jena-core/src/test/java/org/apache/jena/graph/test/AbstractTestGraph.java index 912a2575b34..f03651ea3bb 100644 --- a/jena-core/src/test/java/org/apache/jena/graph/test/AbstractTestGraph.java +++ b/jena-core/src/test/java/org/apache/jena/graph/test/AbstractTestGraph.java @@ -343,7 +343,7 @@ public void testBulkUpdate() GraphUtil.delete( g, tripleList ); assertEquals( "graph has original size", initialSize, g.size() ); } - + public void testAddWithReificationPreamble() { Graph g = getGraph(); @@ -625,14 +625,44 @@ public void testBulkAddGraph() Graph triples = graphWith( "this type graph; I type slowly" ); GraphUtil.addInto( g, triples ); L.assertHas( new Object[] {"addGraph", g, triples} ); + testContains( g, triples ); + } + + public void testBulkAddGraph1() { + Graph g1 = graphWith( "pigs might fly; dead can dance" ); + Graph g2 = graphWith( "this type graph" ); + GraphUtil.addInto( g1, g2 ); + testContains( g1, g2 ); } + public void testBulkAddGraph2() { + Graph g1 = graphWith( "this type graph" ); + Graph g2 = graphWith( "pigs might fly; dead can dance" ); + GraphUtil.addInto( g1, g2 ); + testContains( g1, g2); + } + public void testBulkDeleteGraph() { Graph g = getAndRegister( L ); Graph triples = graphWith( "this type graph; I type slowly" ); GraphUtil.deleteFrom( g, triples ); L.assertHas( new Object[] {"deleteGraph", g, triples} ); + testOmits( g, triples ); + } + + public void testBulkDeleteGraph1() { + Graph g1 = graphWith( "pigs might fly; dead can dance" ); + Graph g2 = graphWith( "pigs might fly" ); + GraphUtil.deleteFrom( g1, g2 ); + testOmits( g1, g2); + } + + public void testBulkDeleteGraph2() { + Graph g1 = graphWith( "pigs might fly" ); + Graph g2 = graphWith( "pigs might fly; dead can dance" ); + GraphUtil.deleteFrom( g1, g2 ); + testOmits( g1, g2); } public void testGeneralEvent() From 4a05c045194e90929ff77b8c75f77cf1fca4b286 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Thu, 2 Feb 2017 22:17:35 +0000 Subject: [PATCH 3/4] Use Iterator.forEachRemaining. --- .../main/java/org/apache/jena/graph/GraphUtil.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java index ef6dc081d8d..9286c1447c2 100644 --- a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java +++ b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java @@ -150,11 +150,9 @@ private static void addIteratorWorker( Graph graph, Iterator it ) { private static void addIteratorWorkerDirect( Graph graph, Iterator it ) { if ( OldStyle && graph instanceof GraphWithPerform ) { GraphWithPerform g = (GraphWithPerform)graph; - while (it.hasNext()) - g.performAdd(it.next()); + it.forEachRemaining(g::performAdd); } else { - while (it.hasNext()) - graph.add(it.next()); + it.forEachRemaining(graph::add); } } @@ -230,11 +228,9 @@ private static void deleteIteratorWorker(Graph graph, Iterator it) { private static void deleteIteratorWorkerDirect(Graph graph, Iterator it) { if ( OldStyle && graph instanceof GraphWithPerform ) { GraphWithPerform g = (GraphWithPerform)graph ; - while(it.hasNext()) - g.performDelete(it.next()) ; + it.forEachRemaining(g::performDelete); } else { - while(it.hasNext()) - graph.delete(it.next()); + it.forEachRemaining(graph::delete); } } From 13f1935caf9cdd8c5c6f44872d4dfeaa86e0cb04 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Fri, 3 Feb 2017 22:17:42 +0000 Subject: [PATCH 4/4] Choose to loop on src if 2*|src| < |dst| --- .../java/org/apache/jena/graph/GraphUtil.java | 65 +++++++++++-------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java index 9286c1447c2..725796d7ebc 100644 --- a/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java +++ b/jena-core/src/main/java/org/apache/jena/graph/GraphUtil.java @@ -19,14 +19,14 @@ package org.apache.jena.graph; import java.util.ArrayList; -import java.util.Iterator ; -import java.util.List ; -import java.util.Set ; +import java.util.Iterator; +import java.util.List; +import java.util.Set; -import org.apache.jena.graph.impl.GraphWithPerform ; -import org.apache.jena.util.IteratorCollection ; -import org.apache.jena.util.iterator.ExtendedIterator ; -import org.apache.jena.util.iterator.WrappedIterator ; +import org.apache.jena.graph.impl.GraphWithPerform; +import org.apache.jena.util.IteratorCollection; +import org.apache.jena.util.iterator.ExtendedIterator; +import org.apache.jena.util.iterator.WrappedIterator; /** An ad-hoc collection of useful code for graphs @@ -102,22 +102,19 @@ public static ExtendedIterator findAll(Graph g) { } public static void add(Graph graph, Triple[] triples) { - if ( OldStyle && graph instanceof GraphWithPerform ) - { + if ( OldStyle && graph instanceof GraphWithPerform ) { GraphWithPerform g = (GraphWithPerform)graph ; for (Triple t : triples ) g.performAdd(t) ; graph.getEventManager().notifyAddArray(graph, triples) ; - } - else - { + } else { for (Triple t : triples ) graph.add(t) ; } } public static void add(Graph graph, List triples) { - addIteratorWorkerDirect(graph, triples.iterator()); + addIteratorWorkerDirect(graph, triples.iterator()) ; if ( OldStyle && graph instanceof GraphWithPerform ) graph.getEventManager().notifyAddList(graph, triples) ; } @@ -135,10 +132,10 @@ public static void add(Graph graph, Iterator it) { /** Add triples into the destination (arg 1) from the source (arg 2)*/ public static void addInto(Graph dstGraph, Graph srcGraph ) { - if ( dstGraph == srcGraph ) + if ( dstGraph == srcGraph && ! dstGraph.getEventManager().listening() ) return ; dstGraph.getPrefixMapping().setNsPrefixes(srcGraph.getPrefixMapping()) ; - addIteratorWorker(dstGraph, GraphUtil.findAll( srcGraph )); + addIteratorWorker(dstGraph, findAll( srcGraph )); dstGraph.getEventManager().notifyAddGraph( dstGraph, srcGraph ); } @@ -156,6 +153,10 @@ private static void addIteratorWorkerDirect( Graph graph, Iterator it ) } } + private static boolean requireEvents(Graph graph) { + return graph.getEventManager().listening() ; + } + public static void delete(Graph graph, Triple[] triples) { if ( OldStyle && graph instanceof GraphWithPerform ) { GraphWithPerform g = (GraphWithPerform)graph ; @@ -168,15 +169,13 @@ public static void delete(Graph graph, Triple[] triples) { } } - public static void delete(Graph graph, List triples) - { + public static void delete(Graph graph, List triples) { deleteIteratorWorkerDirect(graph, triples.iterator()); if ( OldStyle && graph instanceof GraphWithPerform ) graph.getEventManager().notifyDeleteList(graph, triples) ; } - public static void delete(Graph graph, Iterator it) - { + public static void delete(Graph graph, Iterator it) { if ( OldStyle && graph instanceof GraphWithPerform ) { // Materialize for the notify. List s = IteratorCollection.iteratorToList(it) ; @@ -186,29 +185,43 @@ public static void delete(Graph graph, Iterator it) deleteIteratorWorker(graph, it); } + private static int MIN_SRC_SIZE = 1000 ; + private static int DST_SRC_RATIO = 2 ; + /** Delete triples in the destination (arg 1) as given in the source (arg 2) */ public static void deleteFrom(Graph dstGraph, Graph srcGraph) { - if ( dstGraph == srcGraph ) { + boolean events = requireEvents(dstGraph); + + if ( dstGraph == srcGraph && ! events ) { dstGraph.clear(); return; } - if ( dstGraph.size() > srcGraph.size() ) { - // Loop on srcGraph - deleteIteratorWorker(dstGraph, GraphUtil.findAll(srcGraph)) ; + + int dstSize = dstGraph.size(); + int srcSize = srcGraph.size(); + + // Whether to loop on dstGraph or srcGraph. + // Loop on src if: + // * srcGraph is below the threshold MIN_SRC_SIZE (a "Just Do it" number) + // * dstGraph is "much" larger than src where "much" is given by DST_SRC_RATIO + boolean loopOnSrc = (srcSize < MIN_SRC_SIZE || dstSize > DST_SRC_RATIO*srcSize) ; + + if ( loopOnSrc ) { + deleteIteratorWorker(dstGraph, findAll(srcGraph)) ; dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; return; } - // dstGraph smaller. + // Loop on dstGraph, not srcGraph, but need to use srcGraph.contains on this code path. List toBeDeleted = new ArrayList<>(); // Loop on dstGraph - Iterator iter = dstGraph.find(null, null, null); + Iterator iter = findAll(dstGraph); for( ; iter.hasNext() ; ) { Triple t = iter.next(); if ( srcGraph.contains(t) ) toBeDeleted.add(t); } - dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; deleteIteratorWorkerDirect(dstGraph, toBeDeleted.iterator()); + dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ; } /**