Skip to content
Permalink
Browse files
[ASTERIXDB-3008][COMP] Improve translation of inner joins in subplans
- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Fix performance regression caused by ASTERIXDB-3006
- Fix incorrect free variable computation by
  FreeVariableVisitor for join clauses

Change-Id: I1f9d0f453202ec79673f2f66b9034fbc6047212b
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15127
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
  • Loading branch information
Dmitry Lychagin committed Feb 3, 2022
1 parent e4fde26 commit 5588b7974f4b5d25025aa80ed44b0608fe08b312
Showing 7 changed files with 377 additions and 75 deletions.
@@ -18,11 +18,9 @@
*/
package org.apache.asterix.translator;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -125,6 +123,7 @@
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterUnnestOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
@@ -155,7 +154,6 @@ public class SqlppExpressionToPlanTranslator extends LangExpressionToPlanTransla
public static final String REWRITE_IN_AS_OR_OPTION = "rewrite_in_as_or";
private static final boolean REWRITE_IN_AS_OR_OPTION_DEFAULT = true;

private Deque<Mutable<ILogicalOperator>> uncorrelatedRightBranchStack = new ArrayDeque<>();
private final Map<VarIdentifier, IAObject> externalVars;
private final boolean translateInAsOr;

@@ -303,12 +301,10 @@ public Pair<ILogicalOperator, LogicalVariable> visit(FromClause fromClause, Muta
throws CompilationException {
Mutable<ILogicalOperator> inputSrc = arg;
Pair<ILogicalOperator, LogicalVariable> topUnnest = null;
uncorrelatedRightBranchStack.push(inputSrc);
for (FromTerm fromTerm : fromClause.getFromTerms()) {
topUnnest = fromTerm.accept(this, inputSrc);
inputSrc = new MutableObject<>(topUnnest.first);
}
uncorrelatedRightBranchStack.pop();
return topUnnest;
}

@@ -345,8 +341,10 @@ public Pair<ILogicalOperator, LogicalVariable> visit(FromTerm fromTerm, Mutable<
public Pair<ILogicalOperator, LogicalVariable> visit(JoinClause joinClause, Mutable<ILogicalOperator> leftInputRef)
throws CompilationException {
SourceLocation sourceLoc = joinClause.getSourceLocation();
if (joinClause.getJoinType() == JoinType.INNER && !context.inSubplan()) {
Mutable<ILogicalOperator> rightInputRef = uncorrelatedRightBranchStack.peek();
if (joinClause.getJoinType() == JoinType.INNER && !hasFreeVariables(joinClause.getRightExpression())) {
EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
ets.setSourceLocation(joinClause.getSourceLocation());
Mutable<ILogicalOperator> rightInputRef = new MutableObject<>(ets);
Pair<ILogicalOperator, LogicalVariable> rightBranch =
generateUnnestForBinaryCorrelateRightBranch(joinClause, rightInputRef, false, null);
// A join operator with condition TRUE.
@@ -509,6 +507,16 @@ public Pair<ILogicalOperator, LogicalVariable> visit(JoinClause joinClause, Muta
}
}

private boolean hasFreeVariables(Expression expr) throws CompilationException {
Set<VariableExpr> freeVars = SqlppRewriteUtil.getFreeVariable(expr);
for (VariableExpr varRef : freeVars) {
if (!SqlppVariableUtil.isExternalVariableReference(varRef)) {
return true;
}
}
return false;
}

private static IAlgebricksConstantValue translateLeftOuterMissingValue(Literal.Type type)
throws CompilationException {
switch (type) {
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Test plan for CH2 Q8
*/

drop dataverse test if exists;
create dataverse test;
use test;

create dataset stock(id uuid not unknown) open type primary key `id` autogenerated;
create dataset orders(id uuid not unknown) open type primary key `id` autogenerated;
create dataset customer(id uuid not unknown) open type primary key `id` autogenerated;
create dataset nation(id uuid not unknown) open type primary key `id` autogenerated;
create dataset supplier(id uuid not unknown) open type primary key `id` autogenerated;
create dataset item(id uuid not unknown) open type primary key `id` autogenerated;
create dataset region(id uuid not unknown) open type primary key `id` autogenerated;

SELECT
GET_YEAR(DATE(rn1coolis.o_entry_d)) AS l_year,
ROUND((SUM(CASE WHEN sun2.n_name = 'Germany' THEN rn1coolis.ol_amount ELSE 0 END) / SUM(rn1coolis.ol_amount)),2)
AS mkt_share
FROM (
SELECT rn1cooli.o_entry_d, rn1cooli.ol_amount, s.s_w_id, s.s_i_id
FROM stock s
JOIN (
SELECT o.o_entry_d, ol.ol_i_id, ol.ol_amount, ol.ol_supply_w_id
FROM orders o, o.o_orderline ol, item i
JOIN (
SELECT c.c_id,c.c_w_id, c.c_d_id
FROM customer c
JOIN (
SELECT n1.n_nationkey
FROM nation n1, region r
WHERE n1.n_regionkey = r.r_regionkey AND r.r_name = 'Europe'
) nr ON nr.n_nationkey = string_to_codepoint(c.c_state)[0]
) cnr ON cnr.c_id = o.o_c_id
AND cnr.c_w_id = o.o_w_id AND cnr.c_d_id = o.o_d_id AND i.i_data LIKE '%b'
AND i.i_id = ol.ol_i_id AND ol.ol_i_id < 1000
AND o.o_entry_d BETWEEN '2017-01-01 00:00:00.000000' AND '2018-12-31 00:00:00.000000'
) rn1cooli ON rn1cooli.ol_i_id = s.s_i_id AND rn1cooli.ol_supply_w_id = s.s_w_id
) rn1coolis
JOIN (
SELECT su.su_suppkey, n2.n_name
FROM supplier su, nation n2
WHERE su.su_nationkey = n2.n_nationkey
) sun2 ON rn1coolis.s_w_id * rn1coolis.s_i_id MOD 10000 = sun2.su_suppkey
GROUP BY get_year(date(rn1coolis.o_entry_d))
ORDER BY l_year;
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Test plan when right branch of an inner join uses an outer variable.
* Currently this results in NL join
*/

drop dataverse test if exists;
create dataverse test;
use test;

create dataset t1(id uuid not unknown) open type primary key id autogenerated;
create dataset t2(id uuid not unknown) open type primary key id autogenerated;

select a
from t1
let a = (select value count(*) from t2 join t1.x as z on t2.y = z.b );
@@ -0,0 +1,128 @@
-- DISTRIBUTE_RESULT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- SORT_MERGE_EXCHANGE [$#1(ASC) ] |PARTITIONED|
-- SORT_GROUP_BY[$$333] |PARTITIONED|
{
-- AGGREGATE |LOCAL|
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- HASH_PARTITION_EXCHANGE [$$333] |PARTITIONED|
-- SORT_GROUP_BY[$$278] |PARTITIONED|
{
-- AGGREGATE |LOCAL|
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$304][$$325] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$304] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$280, $$279][$$290, $$320] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$280, $$279] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.stock) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$290, $$320] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$297, $$299, $$301][$$317, $$318, $$316] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$297, $$299, $$301] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$290][$$308] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$290] |PARTITIONED|
-- STREAM_SELECT |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- UNNEST |PARTITIONED|
-- STREAM_SELECT |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.orders) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$308] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- STREAM_SELECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.item) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$317, $$318, $$316] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$295][$$315] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$295] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.customer) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$315] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$292][$$293] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$292] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- REPLICATE |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.nation) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$293] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- STREAM_SELECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.region) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$325] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- HYBRID_HASH_JOIN [$$309][$$310] |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$309] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.supplier) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
-- HASH_PARTITION_EXCHANGE [$$310] |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ASSIGN |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- REPLICATE |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- STREAM_PROJECT |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- DATASOURCE_SCAN (test.nation) |PARTITIONED|
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
-- EMPTY_TUPLE_SOURCE |PARTITIONED|

0 comments on commit 5588b79

Please sign in to comment.