diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/OptimizerStd.java b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/OptimizerStd.java index 264a381634b..76fdb52dd09 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/OptimizerStd.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/optimize/OptimizerStd.java @@ -40,21 +40,21 @@ public OptimizerStd(Context context) { /** Alternative name for compatibility only */ public static final Symbol filterPlacementOldName = SystemARQ.allocSymbol("filterPlacement") ; - + @Override public Op rewrite(Op op) { // Record optimizer if ( context.get(ARQConstants.sysOptimizer) == null ) context.set(ARQConstants.sysOptimizer, this) ; - + // Old name, new name fixup. if ( context.isDefined(filterPlacementOldName) ) { if ( context.isUndef(ARQ.optFilterPlacement) ) context.set(ARQ.optFilterPlacement, context.get(filterPlacementOldName)) ; } - + if ( false ) { - // Removal of "group of one" join (AKA SPARQL "simplification") + // Removal of "group of one" join (AKA SPARQL "simplification") // is done during algebra generation in AlgebraGenerator op = apply("Simplify", new TransformSimplify(), op) ; op = apply("Delabel", new TransformRemoveLabels(), op) ; @@ -62,13 +62,13 @@ public Op rewrite(Op op) { // ** TransformScopeRename // This is a requirement for the linearization execution that the default - // ARQ query engine uses where possible. - // This transformation must be done (e.g. by QueryEngineBase) if no other optimization is done. + // ARQ query engine uses where possible. + // This transformation must be done (e.g. by QueryEngineBase) if no other optimization is done. op = TransformScopeRename.transform(op) ; - + // Prepare expressions. OpWalker.walk(op, new OpVisitorExprPrepare(context)) ; - + // Convert paths to triple patterns if possible. if ( context.isTrueOrUndef(ARQ.optPathFlatten) ) { op = apply("Path flattening", new TransformPathFlattern(), op) ; @@ -80,11 +80,11 @@ public Op rewrite(Op op) { // Having done the required transforms, specifically TransformScopeRename, // do each optimization via an overrideable method. Subclassing can modify the // transform performed. - + // Expression constant folding if ( context.isTrueOrUndef(ARQ.optExprConstantFolding) ) op = transformExprConstantFolding(op) ; - + if ( context.isTrueOrUndef(ARQ.propertyFunctions) ) op = transformPropertyFunctions(op) ; @@ -95,9 +95,9 @@ public Op rewrite(Op op) { // Expand IN and NOT IN which then allows other optimizations to be applied. if ( context.isTrueOrUndef(ARQ.optFilterExpandOneOf) ) op = transformFilterExpandOneOf(op) ; - + // Eliminate/Inline assignments where possible - // Do this before we do some of the filter transformation work as inlining assignments + // Do this before we do some of the filter transformation work as inlining assignments // may give us more flexibility in optimizing the resulting filters if ( context.isTrue(ARQ.optInlineAssignments) ) op = transformInlineAssignments(op) ; @@ -105,22 +105,22 @@ public Op rewrite(Op op) { // Apply some general purpose filter transformations if ( context.isTrueOrUndef(ARQ.optFilterImplicitJoin) ) op = transformFilterImplicitJoin(op) ; - + if ( context.isTrueOrUndef(ARQ.optImplicitLeftJoin) ) op = transformFilterImplicitLeftJoin(op) ; - + if ( context.isTrueOrUndef(ARQ.optFilterDisjunction) ) op = transformFilterDisjunction(op) ; - + // Some ORDER BY-LIMIT N queries can be done more efficiently by only recording // the top N items, so a full sort is not needed. if ( context.isTrueOrUndef(ARQ.optTopNSorting) ) op = transformTopNSorting(op) ; - + // ORDER BY+DISTINCT optimizations // We apply the one that changes evaluation order first since when it does apply it will give much // better performance than just transforming DISTINCT to REDUCED - + if ( context.isTrueOrUndef(ARQ.optOrderByDistinctApplication) ) op = transformOrderByDistinctApplication(op) ; @@ -128,11 +128,11 @@ public Op rewrite(Op op) { // Reduces memory consumption. if ( context.isTrueOrUndef(ARQ.optDistinctToReduced) ) op = transformDistinctToReduced(op) ; - + // Find joins/leftJoin that can be done by index joins (generally preferred as fixed memory overhead). if ( context.isTrueOrUndef(ARQ.optIndexJoinStrategy) ) op = transformJoinStrategy(op) ; - + // Place filters close to where their dependency variables are defined. // This prunes the output of that step as early as possible. // If done before TransformJoinStrategy, you can get two applications @@ -140,20 +140,20 @@ public Op rewrite(Op op) { // because filters are generally cheap, but it looks a bit bad. if ( context.isTrueOrUndef(ARQ.optFilterPlacement) ) op = transformFilterPlacement(op) ; - - // Replace suitable FILTER(?x = TERM) with (assign) and write the TERM for ?x in the pattern. + + // Replace suitable FILTER(?x = TERM) with (assign) and write the TERM for ?x in the pattern. // Apply (possible a second time) after FILTER placement as it can create new possibilities. // See JENA-616. if ( context.isTrueOrUndef(ARQ.optFilterEquality) ) op = transformFilterEquality(op) ; - + // Replace suitable FILTER(?x != TERM) with (minus (original) (table)) where the table contains // the candidate rows to be eliminated // Off by default due to minimal performance difference if ( context.isTrue(ARQ.optFilterInequality) ) op = transformFilterInequality(op) ; - - // Promote table empty as late as possible since this will only be produced by other + + // Promote table empty as late as possible since this will only be produced by other // optimizations and never directly from algebra generation if ( context.isTrueOrUndef(ARQ.optPromoteTableEmpty) ) op = transformPromoteTableEmpty(op) ; @@ -165,17 +165,17 @@ public Op rewrite(Op op) { // Normally, leave to the specific engines. if ( context.isTrue(ARQ.optReorderBGP) ) op = transformReorder(op) ; - + // Merge (extend) and (assign) stacks if ( context.isTrueOrUndef(ARQ.optMergeExtends) ) op = transformExtendCombine(op) ; - + // Mark if ( false ) op = OpLabel.create("Transformed", op) ; return op ; } - + protected Op transformExprConstantFolding(Op op) { return Transformer.transform(new TransformCopy(), new ExprTransformConstantFold(), op); } @@ -226,8 +226,8 @@ protected Op transformJoinStrategy(Op op) { protected Op transformFilterPlacement(Op op) { if ( context.isTrue(ARQ.optFilterPlacementConservative)) op = apply("Filter Placement (conservative)", new TransformFilterPlacementConservative(), op) ; - else { - // Whether to push into BGPs + else { + // Whether to push into BGPs boolean b = context.isTrueOrUndef(ARQ.optFilterPlacementBGP) ; op = apply("Filter Placement", new TransformFilterPlacement(b), op) ; } @@ -264,32 +264,33 @@ public static Op apply(Transform transform, Op op) { return op2 ; return op ; } - + + private static final boolean debug = false ; + private static final boolean printNoAction = false; + public static Op apply(String label, Transform transform, Op op) { Op op2 = Transformer.transformSkipService(transform, op) ; - - final boolean debug = false ; + + if ( debug ) { - if ( label != null && log.isInfoEnabled() ) + if ( printNoAction && label != null ) log.info("Transform: " + label) ; if ( op == op2 ) { - if ( log.isInfoEnabled() ) + if ( printNoAction ) log.info("No change (==)") ; return op2 ; } - if ( op.equals(op2) ) { - if ( log.isInfoEnabled() ) + if ( printNoAction ) log.info("No change (equals)") ; return op2 ; } - if ( log.isInfoEnabled() ) { - log.info("\n" + op.toString()) ; - log.info("\n" + op2.toString()) ; - } + + if ( ! printNoAction && label != null ); + log.info("Transform: " + label) ; + log.info("\n" + op.toString()) ; + log.info("\n" + op2.toString()) ; } - if ( op2 != op ) - return op2 ; - return op ; + return op2 ; } } diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java index 9a11a479d33..c9ad4083500 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java @@ -42,20 +42,20 @@ static public boolean isLinear(Op _left, Op _right) { // Modifiers that we don't touch // OpSlice, OpTopN, OpOrder (which gets lost - could remove it!) // (These could be first and top - i.e. in call once position, and be safe) - + Op left = effectiveOp(_left) ; Op right = effectiveOp(_right) ; if ( ! isSafeForLinear(left) || ! isSafeForLinear(right) ) return false ; - + // Modifiers. if ( right instanceof OpExtend ) return false ; if ( right instanceof OpAssign ) return false ; if ( right instanceof OpGroup ) return false ; // if ( right instanceof OpDiff ) return false ; // if ( right instanceof OpMinus ) return false ; - + if ( right instanceof OpSlice ) return false ; if ( right instanceof OpTopN ) return false ; if ( right instanceof OpOrder ) return false ; @@ -66,7 +66,7 @@ static public boolean isLinear(Op _left, Op _right) { // -- pre check for ops we can't handle in a linear fashion. // These are the negation patterns (minus and diff) - // FILTER NOT EXISTS is safe - it's defined by iteration like the linear execution algorithm. + // FILTER NOT EXISTS is safe - it's defined by iteration like the linear execution algorithm. private static class UnsafeLineraOpException extends RuntimeException {} private static OpVisitor checkForUnsafeVisitor = new OpVisitorBase() { @Override public void visit(OpMinus opMinus) { throw new UnsafeLineraOpException(); } @@ -77,10 +77,11 @@ private static boolean isSafeForLinear(Op op) { catch (UnsafeLineraOpException e) { return false; } } // -- - + // Check left can stream into right static private boolean check(Op leftOp, Op rightOp) { if ( print ) { + System.err.println("== JoinClassifier"); System.err.println("Left::"); System.err.println(leftOp) ; System.err.println("Right::"); @@ -101,7 +102,7 @@ static private boolean check(Op leftOp, Op rightOp) { System.err.println("Right") ; vfRight.print(System.err) ; } - + Set vRightFixed = vfRight.getFixed() ; Set vRightOpt = vfRight.getOpt() ; Set vRightFilter = vfRight.getFilter() ; @@ -126,7 +127,7 @@ static private boolean check(Op leftOp, Op rightOp) { // System.err.println("vRightFilterOnly.not isEmpty"); // return false; } - + // Step 2 : remove any variable definitely fixed from the floating sets // because the nature of the "join" will deal with that. vLeftOpt = SetUtils.difference(vLeftOpt, vLeftFixed) ; @@ -168,7 +169,9 @@ static private boolean check(Op leftOp, Op rightOp) { boolean bad1 = r11 || r12 ; if ( print ) - System.err.println("Case 1 = " + bad1) ; + System.err.println("J: Case 1 (false=ok) = " + bad1) ; + if ( bad1 ) + return false; // Case 2 : a filter in the RHS is uses a variable from the LHS (whether // fixed or optional) @@ -179,7 +182,9 @@ static private boolean check(Op leftOp, Op rightOp) { boolean bad2 = SetUtils.intersectionP(vRightFilter, vLeftFixed) ; if ( print ) - System.err.println("Case 2 = " + bad2) ; + System.err.println("J: Case 2 (false=ok) = " + bad2) ; + if ( bad2 ) + return false; // Case 3 : an assign in the RHS uses a variable not introduced // Scoping means we must hide the LHS value from the RHS @@ -189,18 +194,15 @@ static private boolean check(Op leftOp, Op rightOp) { // the RHS // vRightAssign.removeAll(vRightFixed); // boolean bad3 = vRightAssign.size() > 0; - + boolean bad3 = SetUtils.intersectionP(vRightAssign, vLeftFixed) ; if ( print ) - System.err.println("Case 3 = " + bad3) ; + System.err.println("J: Case 3 (false=ok) = " + bad3) ; + if ( bad3 ) + return false; - // Linear if all conditions are false - boolean result = !bad1 && !bad2 && !bad3 ; - - if ( print ) { - System.err.println("Result: "+result) ; - } - return result ; + System.err.println("J: Result: OK"); + return true; } /** Find the "effective op" - i.e. the one that may be sensitive to linearization */ diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/LeftJoinClassifier.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/LeftJoinClassifier.java index 7dee439f90d..b94a601315b 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/LeftJoinClassifier.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/LeftJoinClassifier.java @@ -32,20 +32,20 @@ public class LeftJoinClassifier { static /*final*/ public boolean print = false ; - + // Test for the "well-formed" criterion of left joins whereby they can // be executed against a current set of bindings. If not, the left join // has to be done by execution of the left, executing the right without // the left (so no substitution/additional indexing), then // left-join-ed. AND that can be expensive - luckily, it only occurs - // in OPTIONALs with a pattern depth of 2 or more. + // in OPTIONALs with a pattern depth of 2 or more. - // This amounts to testing whether there are any optional variables in the + // This amounts to testing whether there are any optional variables in the // RHS pattern (hence they are nested in someway) that also occur in the LHS // of the LeftJoin being considered. - + // Need also worry about filters in the right (not in the LJ condition) - // which use vars from the left. + // which use vars from the left. static public boolean isLinear(OpLeftJoin op) { return isLinear(op.getLeft(), op.getRight()) ; @@ -54,12 +54,12 @@ static public boolean isLinear(OpLeftJoin op) { static public boolean isLinear(Op left, Op right) { left = effectiveOp(left) ; right = effectiveOp(right) ; - + // Subquery with modifier. Substitution does not apply. // With SELECT *, it's as if the subquery were just the pattern. if ( right instanceof OpModifier ) return false ; - + Set leftVars = OpVars.visibleVars(left) ; if ( print ) { System.err.println("Left") ; @@ -84,48 +84,53 @@ static public boolean isLinear(Op left, Op right) { System.err.println("Right") ; vf.print(System.err) ; } - + // Case 1 : If there are any variables in the LHS that are // filter-only or filter-before define, we can't do anything. if ( ! vf.getFilterOnly().isEmpty() ) { // A tigher condition is to see of any of the getFilterOnly are possible from the - // left. If not, then we can still use a sequence. + // left. If not, then we can still use a sequence. // But an outer sequence may push arbitrary here so play safe on the argument // this is a relative uncommon case. - if (print) System.err.println("Case 1 - " + false); + if (print) System.err.println("LJ: Case 1 (true=ok) - " + false); return false ; } - - if (print) System.err.println("Case 1 - " + true); + + if (print) System.err.println("LJ: Case 1 (true=ok) - " + true); Set optRight = vf.getOpt() ; Set fixedRight = vf.getFixed() ; - Set filterVarsRight = vf.getFilter() ; + Set filterVarsRight = vf.getFilter() ; Set assignVarsRight = vf.getAssign() ; // Case 2 // A variable is nested in an optional on the RHS and on the LHS // Cannot linearize as we must preserve scope boolean b2 = SetUtils.intersectionP(leftVars, optRight) ; - if (print) System.err.println("Case 2 - " + b2); - + if (print) System.err.println("LJ: Case 2 (false=ok) - " + b2); + // Case 3 // A variable mentioned in a filter within the RHS already exists on the LHS // Cannot linearize as would change filter evaluation boolean b3 = SetUtils.intersectionP(leftVars, filterVarsRight) ; - if (print) System.err.println("Case 3 - " + b3); - + if (print) System.err.println("LJ: Case 3 (false=ok) - " + b3); + // Case 4 // A variable mentioned in the assign is not introduced on the RHS // Cannot linearize as would change bind evaluation Set unsafeAssign = new HashSet<>(assignVarsRight); unsafeAssign.removeAll(fixedRight); boolean b4 = unsafeAssign.size() > 0 ; - if (print) System.err.println("Case 4 - " + b4); + if (print) System.err.println("LJ: Case 4 (false=ok) - " + b4); + + if (print) { + boolean b9 = ! b2 && ! b3 && ! b4 ; + System.err.println("LJ: Case !2&!3&!4 (true=ok) - " + b9); + } // Linear if all conditions are false return ! b2 && ! b3 && ! b4 ; } - - static public Set nonLinearVars(OpLeftJoin op) { + + static public Set nonLinearVars(OpLeftJoin op) { Op left = effectiveOp(op.getLeft()) ; Op right = effectiveOp(op.getRight()) ; Set leftVars = OpVars.visibleVars(left) ; @@ -133,7 +138,7 @@ static public Set nonLinearVars(OpLeftJoin op) { return SetUtils.intersection(leftVars, optRight) ; } - + private static Op effectiveOp(Op op) { if (op instanceof OpExt) op = ((OpExt) op).effectiveOp() ;