From ec02e99e264694817244fa815933f4cc377bb51c Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Thu, 15 Sep 2016 12:34:14 +0100 Subject: [PATCH] JENA-1235: Filter placement and OpDisjunction --- .../optimize/TransformFilterPlacement.java | 79 +++++++++++++++---- .../org/apache/jena/sparql/expr/ExprList.java | 2 + .../algebra/optimize/TestOptimizer.java | 28 +++++++ .../TestTransformFilterPlacement.java | 56 ++++++++++++- 4 files changed, 149 insertions(+), 16 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/TransformFilterPlacement.java b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/TransformFilterPlacement.java index 43c15ed5199..1ff753755e4 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/TransformFilterPlacement.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/TransformFilterPlacement.java @@ -43,7 +43,7 @@ * variables. *

Process BGP (whether triples or quads) is left as a separate step (but after this transform) * because it can desirable to reorder the BGP before placing filters, - * or afterwards. + * or afterwards. */ public class TransformFilterPlacement extends TransformCopy { @@ -194,6 +194,8 @@ else if ( input instanceof OpJoin ) placement = placeJoin(exprs, (OpJoin)input) ; else if ( input instanceof OpConditional ) placement = placeConditional(exprs, (OpConditional)input) ; + else if ( input instanceof OpDisjunction ) + placement = placeDisjunction(exprs, (OpDisjunction)input) ; else if ( input instanceof OpLeftJoin ) placement = placeLeftJoin(exprs, (OpLeftJoin)input) ; else if ( input instanceof OpFilter ) @@ -584,12 +586,10 @@ private Placement placeLeftJoin(ExprList exprs, OpLeftJoin opLeftJoin) { } private Placement placeUnion(ExprList exprs, OpUnion input) { - if ( false ) - // Safely but inefficiently do nothing. - return null ; //new Placement(input, exprs) ; - if ( false ) { - // Push into both sides. + // Push into both sides without thinking. + // Left as a safety fallback. + Op left = input.getLeft() ; Placement pLeft = transform(exprs, left) ; @@ -655,11 +655,64 @@ private Placement placeUnion(ExprList exprs, OpUnion input) { if ( exprs2 == null ) exprs2 = emptyList ; - Op op2 = OpUnion.create(newLeft, newRight) ; + //Op op2 = OpUnion.create(newLeft, newRight) ; + Op op2 = input.copy(newLeft, newRight) ; return result(op2, exprs2) ; } - /** Try to optimize (filter (extend ...)) */ + private Placement placeDisjunction(ExprList exprs, OpDisjunction input) { + // Do on each arm. + // better (neater) would be to pass out exprs not placed anywhere. + // Combine with union. + + if ( false ) { + // Push everything, always + // Left as a safty fall back. + List x = new ArrayList<>() ; + input.getElements().forEach(op->{ + Placement p = transform(exprs, op) ; + if ( isNoChange(p) ) { + x.add(buildFilter(exprs, op)) ; + } else { + Op op1 = buildFilter(p) ; + x.add(op1) ; + } + }); + return result(input.copy(x), emptyList) ; + } + + // Don't push any expressions that aren't used in any of the arms of the disjunction. + // This is more about being tidy. + List unplaced = new ArrayList<>(exprs.getList()) ; + //List x = input.getElements().stream().map(op->transform(exprs, op)).collect(Collectors.toList()) ; + List placements = new ArrayList<>(exprs.size()) ; + Boolean someChange = Boolean.FALSE ; + for ( Op op : input.getElements() ) { + Placement p = transform(exprs, op) ; + if ( isChange(p) ) { + unplaced.retainAll(p.unplaced.getList()) ; + someChange = Boolean.TRUE ; + } else + p = result(op, exprs) ; + placements.add(p) ; + }; + + if ( ! someChange ) + return noChangePlacement ; + + List retained = new ArrayList<>(exprs.getList()) ; + retained.removeAll(unplaced) ; + + // Mutate placements to remove the expres going outside. + List ops = new ArrayList<>(input.size()) ; + for ( Placement p : placements ) { + // No "noChange" at this point. + p.unplaced.getListRaw().removeAll(unplaced) ; + ops.add(buildFilter(p)) ; + } ; + return result(input.copy(ops), new ExprList(unplaced)) ; + } + private Placement placeExtend(ExprList exprs, OpExtend input) { return processExtendAssign(exprs, input) ; } @@ -668,6 +721,7 @@ private Placement placeAssign(ExprList exprs, OpAssign input) { return processExtendAssign(exprs, input) ; } + /** Try to optimize (filter (extend ...)) , (filter (let ...)) */ private Placement processExtendAssign(ExprList exprs, OpExtendAssign input) { // We assume that each (extend) and (assign) is usually in simple form - // always one assignment. We cope with the general form (multiple @@ -790,11 +844,7 @@ private Set fixedVars(Op op) { return OpVars.fixedVars(op) ; } - /** For any expression now in scope, wrap the op with a filter. - * Caution - the ExprList is an in-out argument which is modified. - * This function modifies ExprList passed in to remove any filter - * that is placed. - */ + /** For any expression now in scope, wrap the op with a filter. */ private static Op insertAnyFilter$(ExprList unplacedExprs, Collection patternVarsScope, Op op) { for (Iterator iter = unplacedExprs.iterator(); iter.hasNext();) { @@ -829,9 +879,8 @@ private static Op buildFilter(ExprList exprs, Op op) { return op ; for ( Expr expr : exprs ) { - if ( op == null ) { + if ( op == null ) op = OpTable.unit() ; - } op = OpFilter.filter(expr, op) ; } return op ; diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/ExprList.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/ExprList.java index a0c88630eef..297958c191f 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/ExprList.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/ExprList.java @@ -104,6 +104,8 @@ public ExprList copySubstitute(Binding binding) { public void addAll(ExprList exprs) { expressions.addAll(exprs.getList()) ; } public void add(Expr expr) { expressions.add(expr) ; } public List getList() { return Collections.unmodifiableList(expressions) ; } + /** Use only while building ExprList */ + public List getListRaw() { return expressions ; } @Override public Iterator iterator() { return expressions.iterator() ; } diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestOptimizer.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestOptimizer.java index b8656d0bb66..94c0b8d82d3 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestOptimizer.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestOptimizer.java @@ -227,6 +227,34 @@ public class TestOptimizer extends AbstractTestTransform check(queryString, opExpectedString) ; } + // JENA-1235 + @Test public void optimize_02() { + String in = StrUtils.strjoinNL + ( + "(filter (exprlist (|| (= ?var3 'ABC') (= ?var3 'XYZ')) (&& (regex ?var4 'pat1') (!= ?VAR 123)))" + ," (bgp" + ," (triple ?var2 :p1 ?var4)" + ," (triple ?var2 :p2 ?var3)" + ," ))") ; + + String out = StrUtils.strjoinNL + ("(filter (!= ?VAR 123)" + ," (disjunction" + ," (assign ((?var3 'ABC'))" + ," (sequence" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p1 ?var4)))" + ," (bgp (triple ?var2 :p2 'ABC'))))" + ," (assign ((?var3 'XYZ'))" + ," (sequence" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p1 ?var4)))" + ," (bgp (triple ?var2 :p2 'XYZ'))))))" + ) ; + checkAlgebra(in, out) ; + } + + @Test public void combine_extend_01() { Op extend = OpExtend.create(OpTable.unit(), new VarExprList(Var.alloc("x"), new NodeValueInteger(1))); diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestTransformFilterPlacement.java b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestTransformFilterPlacement.java index f06ce3e4dfa..494abc2b7df 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestTransformFilterPlacement.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/optimize/TestTransformFilterPlacement.java @@ -80,7 +80,6 @@ public class TestTransformFilterPlacement extends BaseTest { //extends AbstractT testNoBGP("(filter (= ?x 123) (bgp (?s ?p ?x) (?s ?p ?x1) (?s ?p ?x2)) )", null) ; } - @Test public void place_bgp_06() { testNoChange("(filter (isURI ?g) (quadpattern (?g ?s ?p ?o) ))") ; @@ -811,6 +810,61 @@ public class TestTransformFilterPlacement extends BaseTest { //extends AbstractT test( in, out ) ; } + // JENA-1235 + @Test public void place_disjunction_01() { + String in = StrUtils.strjoinNL + ( + "(filter (exprlist (!= ?VAR 123) (regex ?var4 'pat1'))" + ," (disjunction" + ," (bgp (?var2 :p1 ?var4) (?var2 :p2 ?var3))" + ," (bgp (?var2 :p3 ?var4) (?var2 :p4 ?var3))" + ," ))" + ) ; + + String out = StrUtils.strjoinNL + ("(filter (!= ?VAR 123)" + ," (disjunction" + ," (sequence" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p1 ?var4)))" + ," (bgp (triple ?var2 :p2 ?var3)))" + ," (sequence" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p3 ?var4)))" + ," (bgp (triple ?var2 :p4 ?var3)))))" + ) ; + test( in, out ) ; + } + + // Push in, push and wrap, wrap. + @Test public void place_disjunction_02() { + String in = StrUtils.strjoinNL + ( + "(filter (exprlist (!= ?VAR 123) (regex ?var4 'pat1'))" + ," (disjunction" + ," (bgp (?var2 :p1 ?var4) (?var2 :p2 ?var3))" + ," (bgp (?var2 :p4 ?var3) (?var2 :p3 ?var4))" + ," (bgp (?var2 :p4 ?var9))" + ," ))" + ) ; + String out = StrUtils.strjoinNL + ("(filter (!= ?VAR 123)" + ," (disjunction" + ," (sequence" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p1 ?var4)))" + ," (bgp (triple ?var2 :p2 ?var3)))" + ," (filter (regex ?var4 'pat1')" + ," (bgp" + ," (triple ?var2 :p4 ?var3)" + ," (triple ?var2 :p3 ?var4)" + ," ))" + ," (filter (regex ?var4 'pat1')" + ," (bgp (triple ?var2 :p4 ?var9)))))" + ) ; + test( in, out ) ; + } + private static String propertyFunctionURI = "http://example/PF" ; // Dummy property private static PropertyFunction dummyPropertyFunction = new PropertyFunction() {