Skip to content

Commit

Permalink
SQL: Disallow queries with inner LIMIT that cannot be executed in ES (#…
Browse files Browse the repository at this point in the history
…75960) (#76222)

* Disallow queries with inner limits that cannot be executed correctly

* address review comments

* introduce PushDownAndCombineOrderBy optimization

* Revert "introduce PushDownAndCombineOrderBy optimization"

8d8d992

* use replaceChild

Co-authored-by: Lukas Wegmann <lukas.wegmann@elastic.co>
  • Loading branch information
elasticsearchmachine and Lukas Wegmann committed Aug 9, 2021
1 parent 5a24b67 commit 3f421ba
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,10 @@ selectOrderByOrderByOrderByLimit
SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp ORDER BY emp_no DESC) ORDER BY emp_no ASC) ORDER BY emp_no DESC LIMIT 5;
selectOrderByOrderByOrderByLimitLimit
SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp ORDER BY emp_no DESC) ORDER BY emp_no ASC) ORDER BY emp_no DESC LIMIT 12) LIMIT 6;
selectOrderByLimitSameOrderBy
SELECT * FROM (SELECT * FROM (SELECT * FROM test_emp LIMIT 10) ORDER BY emp_no) ORDER BY emp_no LIMIT 5;
selectGroupByOrderByLimit
SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages) ORDER BY max DESC LIMIT 3;
selectGroupByOrderByLimitNulls
SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages) ORDER BY max DESC NULLS FIRST LIMIT 3;
selectGroupByLimitOrderByLimit
SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp WHERE languages IS NOT NULL GROUP BY languages LIMIT 5) ORDER BY max DESC LIMIT 3;
selectGroupByOrderByOrderByLimit
SELECT * FROM (SELECT max(salary) AS max, languages FROM test_emp GROUP BY languages ORDER BY max ASC) ORDER BY max DESC NULLS FIRST LIMIT 4;
selectGroupByOrderByOrderByLimitNulls
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,16 @@ public LogicalPlan visitQueryNoWith(QueryNoWithContext ctx) {
if (ctx.orderBy().isEmpty() == false) {
List<OrderByContext> orders = ctx.orderBy();
OrderByContext endContext = orders.get(orders.size() - 1);
plan = new OrderBy(source(ctx.ORDER(), endContext), plan, visitList(ctx.orderBy(), Order.class));
Source source = source(ctx.ORDER(), endContext);
List<Order> order = visitList(ctx.orderBy(), Order.class);

if (plan instanceof Limit) {
// Limit from TOP clauses must be the parent of the OrderBy clause
Limit limit = (Limit) plan;
plan = limit.replaceChild(new OrderBy(source, limit.child(), order));
} else {
plan = new OrderBy(source, plan, order);
}
}

LimitClauseContext limitClause = ctx.limitClause();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
package org.elasticsearch.xpack.sql.planner;

import org.elasticsearch.xpack.ql.common.Failure;
import org.elasticsearch.xpack.ql.expression.Order;
import org.elasticsearch.xpack.ql.util.Holder;
import org.elasticsearch.xpack.sql.plan.physical.AggregateExec;
import org.elasticsearch.xpack.sql.plan.physical.FilterExec;
import org.elasticsearch.xpack.sql.plan.physical.LimitExec;
import org.elasticsearch.xpack.sql.plan.physical.OrderExec;
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.sql.plan.physical.PivotExec;
import org.elasticsearch.xpack.sql.plan.physical.UnaryExec;
import org.elasticsearch.xpack.sql.plan.physical.Unexecutable;
import org.elasticsearch.xpack.sql.plan.physical.UnplannedExec;

Expand Down Expand Up @@ -59,20 +62,23 @@ static List<Failure> verifyExecutingPlan(PhysicalPlan plan) {
}

private static void checkForNonCollapsableSubselects(PhysicalPlan plan, List<Failure> failures) {
Holder<Boolean> hasLimit = new Holder<>(Boolean.FALSE);
Holder<List<Order>> orderBy = new Holder<>();
Holder<LimitExec> limit = new Holder<>();
Holder<UnaryExec> limitedExec = new Holder<>();

plan.forEachUp(p -> {
if (hasLimit.get() == false && p instanceof LimitExec) {
hasLimit.set(Boolean.TRUE);
return;
}
if (p instanceof OrderExec) {
if (hasLimit.get() && orderBy.get() != null && ((OrderExec) p).order().equals(orderBy.get()) == false) {
failures.add(fail(p, "Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT"));
} else {
orderBy.set(((OrderExec) p).order());
if (limit.get() == null && p instanceof LimitExec) {
limit.set((LimitExec) p);
} else if (limit.get() != null && limitedExec.get() == null) {
if (p instanceof OrderExec || p instanceof FilterExec || p instanceof PivotExec || p instanceof AggregateExec) {
limitedExec.set((UnaryExec) p);
}
}
});

if (limitedExec.get() != null) {
failures.add(
fail(limit.get(), "LIMIT or TOP cannot be used in a subquery if outer query contains GROUP BY, ORDER BY, PIVOT or WHERE")
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,14 @@ public void testTop() {
public void testUseBothTopAndLimitInvalid() {
ParsingException e = expectThrows(ParsingException.class, () -> parseStatement("SELECT TOP 10 * FROM test LIMIT 20"));
assertEquals("line 1:28: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage());

e = expectThrows(ParsingException.class,
() -> parseStatement("SELECT TOP 30 a, count(*) cnt FROM test WHERE b = 20 GROUP BY a HAVING cnt > 10 LIMIT 40"));
assertEquals("line 1:82: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage());

e = expectThrows(ParsingException.class,
() -> parseStatement("SELECT TOP 30 * FROM test ORDER BY a LIMIT 40"));
assertEquals("line 1:39: TOP and LIMIT are not allowed in the same query - use one or the other", e.getMessage());
}

public void testsSelectNonReservedKeywords() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import org.elasticsearch.xpack.sql.analysis.analyzer.Verifier;
import org.elasticsearch.xpack.sql.expression.function.SqlFunctionRegistry;
import org.elasticsearch.xpack.sql.parser.SqlParser;
import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.sql.stats.Metrics;

import static org.elasticsearch.xpack.sql.SqlTestUtils.TEST_CFG;
Expand All @@ -33,10 +35,14 @@ public class VerifierTests extends ESTestCase {
);
private final Planner planner = new Planner();

private PhysicalPlan verify(String sql) {
return planner.plan(analyzer.analyze(parser.createStatement(sql), true), true);
}

private String error(String sql) {
PlanningException e = expectThrows(
PlanningException.class,
() -> planner.plan(analyzer.analyze(parser.createStatement(sql), true), true)
() -> verify(sql)
);
String message = e.getMessage();
assertTrue(message.startsWith("Found "));
Expand All @@ -45,21 +51,28 @@ private String error(String sql) {
return message.substring(index + pattern.length());
}

private String innerLimitMsg(int line, int column) {
return line
+ ":"
+ column
+ ": LIMIT or TOP cannot be used in a subquery if outer query contains GROUP BY, ORDER BY, PIVOT or WHERE";
}

public void testSubselectWithOrderByOnTopOfOrderByAndLimit() {
assertEquals(
"1:60: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 50),
error("SELECT * FROM (SELECT * FROM test ORDER BY 1 ASC LIMIT 10) ORDER BY 2")
);
assertEquals(
"1:72: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 50),
error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10) ORDER BY 1) ORDER BY 2")
);
assertEquals(
"1:75: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 66),
error("SELECT * FROM (SELECT * FROM (SELECT * FROM test ORDER BY 1 ASC) LIMIT 5) ORDER BY 1 DESC")
);
assertEquals(
"1:152: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 142),
error("SELECT * FROM (" +
"SELECT * FROM (" +
"SELECT * FROM (" +
Expand All @@ -68,17 +81,21 @@ public void testSubselectWithOrderByOnTopOfOrderByAndLimit() {
"ORDER BY int DESC NULLS LAST LIMIT 12) " +
"ORDER BY int DESC NULLS FIRST")
);
assertEquals(
innerLimitMsg(1, 50),
error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10) ORDER BY 1 LIMIT 20) ORDER BY 2")
);
}

public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() {
assertEquals(
"1:96: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 86),
error(
"SELECT * FROM (SELECT max(int) AS max, bool FROM test GROUP BY bool ORDER BY max ASC LIMIT 10) ORDER BY max DESC"
)
);
assertEquals(
"1:112: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 102),
error(
"SELECT * FROM ("
+ "SELECT * FROM ("
Expand All @@ -88,7 +105,7 @@ public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() {
)
);
assertEquals(
"1:186: Cannot use ORDER BY on top of a subquery with ORDER BY and LIMIT",
innerLimitMsg(1, 176),
error("SELECT * FROM (" +
"SELECT * FROM (" +
"SELECT * FROM (" +
Expand All @@ -98,4 +115,41 @@ public void testSubselectWithOrderByOnTopOfGroupByOrderByAndLimit() {
"ORDER BY max DESC NULLS FIRST")
);
}

public void testInnerLimitWithWhere() {
assertEquals(innerLimitMsg(1, 35),
error("SELECT * FROM (SELECT * FROM test LIMIT 10) WHERE int = 1"));
assertEquals(innerLimitMsg(1, 50),
error("SELECT * FROM (SELECT * FROM (SELECT * FROM test LIMIT 10)) WHERE int = 1"));
assertEquals(innerLimitMsg(1, 51),
error("SELECT * FROM (SELECT * FROM (SELECT * FROM test) LIMIT 10) WHERE int = 1"));
}

public void testInnerLimitWithGroupBy() {
assertEquals(innerLimitMsg(1, 37),
error("SELECT int FROM (SELECT * FROM test LIMIT 10) GROUP BY int"));
assertEquals(innerLimitMsg(1, 52),
error("SELECT int FROM (SELECT * FROM (SELECT * FROM test LIMIT 10)) GROUP BY int"));
assertEquals(innerLimitMsg(1, 53),
error("SELECT int FROM (SELECT * FROM (SELECT * FROM test) LIMIT 10) GROUP BY int"));
}

public void testInnerLimitWithPivot() {
assertEquals(innerLimitMsg(1, 52),
error("SELECT * FROM (SELECT int, bool, keyword FROM test LIMIT 10) PIVOT (AVG(int) FOR bool IN (true, false))"));
}

public void testTopWithOrderBySucceeds() {
PhysicalPlan plan = verify("SELECT TOP 5 * FROM test ORDER BY int");
assertEquals(EsQueryExec.class, plan.getClass());
}

public void testInnerTop() {
assertEquals(innerLimitMsg(1, 23),
error("SELECT * FROM (SELECT TOP 10 * FROM test) WHERE int = 1"));
assertEquals(innerLimitMsg(1, 23),
error("SELECT * FROM (SELECT TOP 10 * FROM test) ORDER BY int"));
assertEquals(innerLimitMsg(1, 23),
error("SELECT * FROM (SELECT TOP 10 int, bool, keyword FROM test) PIVOT (AVG(int) FOR bool IN (true, false))"));
}
}

0 comments on commit 3f421ba

Please sign in to comment.