Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@
import org.apache.calcite.tools.RelBuilderFactory;


/**
* Rule to expand search condition in filter.
* <p>
* For example, the filter condition:
* <p>
* <code>SEARCH(col1, Sarg[[1..2)])</code>
* </p>
* Is expanded to:
* <p>
* <code>AND(>=(col1, 1), <(col1, 2))</code>
*
*/
Comment on lines +31 to +42
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Boy Scout Rule

public class PinotFilterExpandSearchRule extends RelOptRule {
public static final PinotFilterExpandSearchRule INSTANCE =
new PinotFilterExpandSearchRule(PinotRuleUtils.PINOT_REL_FACTORY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,18 @@ private PinotQueryRuleSets() {
// Expand all SEARCH nodes to simplified filter nodes. SEARCH nodes get created for queries with range
// predicates, in-clauses, etc.
PinotFilterExpandSearchRule.INSTANCE,
// TODO: Here we apply these two rules twice. This is because in some strange situations,
// PinotFilterExpandSearchRule may generate tautologies (like 'a' = 'a') that are not just inefficient, but
// they also produce failures if the tautology reaches the leaf stage.
// We cannot trivially get rid of them using PinotEvaluateLiteralRule or CompileTimeFunctionsInvoker because
// then we find the issue of trying to compare two strings when `=` only supports numbers in V1.
// By applying FILTER_REDUCE_EXPRESSIONS again, we get rid of the tautologies (using Calcite semantics, which
// may produce its own issues, but most of the times it works fine).
// The problem with that is that FILTER_REDUCE_EXPRESSIONS may produce new SEARCH nodes.
// So we apply PinotFilterExpandSearchRule again to expand them. Could this second expansion produce tautologies?
// Yes, but that should only happen in very strange complex queries, so it is at least an improvement.
CoreRules.FILTER_REDUCE_EXPRESSIONS,
PinotFilterExpandSearchRule.INSTANCE,
// add an extra exchange for sort
PinotSortExchangeNodeInsertRule.INSTANCE,
// copy exchanges down, this must be done after SortExchangeNodeInsertRule
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.apache.pinot.sql.parsers.SqlNodeAndOptions;
import org.apache.pinot.sql.parsers.parser.SqlPhysicalExplain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
Expand All @@ -81,6 +83,7 @@
* <p>It provide the higher level entry interface to convert a SQL string into a {@link DispatchableSubPlan}.
*/
public class QueryEnvironment {
private static final Logger LOGGER = LoggerFactory.getLogger(QueryEnvironment.class);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a bunch of logs to better understand whether the tautologies were removed by the validator or the rules. In the cases I mention it is clear it is always the rules, but it was useful to detect other cases where I actually include the tautology in the SQL request

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All logs are in debug mode, so they shouldn't be enabled by default.

// Calcite configurations
private final RelDataTypeFactory _typeFactory;
private final Prepare.CatalogReader _catalogReader;
Expand Down Expand Up @@ -126,7 +129,7 @@ private PlannerContext getPlannerContext() {
public QueryPlannerResult planQuery(String sqlQuery, SqlNodeAndOptions sqlNodeAndOptions, long requestId) {
try (PlannerContext plannerContext = getPlannerContext()) {
plannerContext.setOptions(sqlNodeAndOptions.getOptions());
RelRoot relRoot = compileQuery(sqlNodeAndOptions.getSqlNode(), plannerContext);
RelRoot relRoot = compileQuery(sqlNodeAndOptions.getSqlNode(), plannerContext, requestId);
// TODO: current code only assume one SubPlan per query, but we should support multiple SubPlans per query.
// Each SubPlan should be able to run independently from Broker then set the results into the dependent
// SubPlan for further processing.
Expand Down Expand Up @@ -155,7 +158,7 @@ public QueryPlannerResult explainQuery(String sqlQuery, SqlNodeAndOptions sqlNod
try (PlannerContext plannerContext = getPlannerContext()) {
SqlExplain explain = (SqlExplain) sqlNodeAndOptions.getSqlNode();
plannerContext.setOptions(sqlNodeAndOptions.getOptions());
RelRoot relRoot = compileQuery(explain.getExplicandum(), plannerContext);
RelRoot relRoot = compileQuery(explain.getExplicandum(), plannerContext, requestId);
if (explain instanceof SqlPhysicalExplain) {
// get the physical plan for query.
DispatchableSubPlan dispatchableSubPlan = toDispatchableSubPlan(relRoot, plannerContext, requestId);
Expand Down Expand Up @@ -190,7 +193,7 @@ public List<String> getTableNamesForQuery(String sqlQuery) {
if (sqlNode.getKind().equals(SqlKind.EXPLAIN)) {
sqlNode = ((SqlExplain) sqlNode).getExplicandum();
}
RelRoot relRoot = compileQuery(sqlNode, plannerContext);
RelRoot relRoot = compileQuery(sqlNode, plannerContext, 0);
Set<String> tableNames = RelToPlanNodeConverter.getTableNamesFromRelRoot(relRoot.rel);
return new ArrayList<>(tableNames);
} catch (Throwable t) {
Expand Down Expand Up @@ -230,23 +233,25 @@ public Set<String> getTableNames() {
// steps
// --------------------------------------------------------------------------

private RelRoot compileQuery(SqlNode sqlNode, PlannerContext plannerContext) {
SqlNode validated = validate(sqlNode, plannerContext);
RelRoot relation = toRelation(validated, plannerContext);
RelNode optimized = optimize(relation, plannerContext);
private RelRoot compileQuery(SqlNode sqlNode, PlannerContext plannerContext, long requestId) {
SqlNode validated = validate(sqlNode, plannerContext, requestId);
RelRoot relation = toRelation(validated, plannerContext, requestId);
RelNode optimized = optimize(relation, plannerContext, requestId);
return relation.withRel(optimized);
}

private SqlNode validate(SqlNode sqlNode, PlannerContext plannerContext) {
private SqlNode validate(SqlNode sqlNode, PlannerContext plannerContext, long requestId) {
LOGGER.debug("Validating query {}:\n{}", requestId, sqlNode);
SqlNode validated = plannerContext.getValidator().validate(sqlNode);
if (!validated.getKind().belongsTo(SqlKind.QUERY)) {
throw new IllegalArgumentException("Unsupported SQL query, failed to validate query:\n" + sqlNode);
}
validated.accept(new BytesCastVisitor(plannerContext.getValidator()));
LOGGER.debug("Validated query {}:\n{}", requestId, validated);
return validated;
}

private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext) {
private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext, long requestId) {
RexBuilder rexBuilder = new RexBuilder(_typeFactory);
RelOptCluster cluster = RelOptCluster.create(plannerContext.getRelOptPlanner(), rexBuilder);
SqlToRelConverter converter =
Expand All @@ -272,18 +277,25 @@ private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext) {
} catch (Throwable e) {
throw new RuntimeException("Failed to trim unused fields from query:\n" + RelOptUtil.toString(rootNode), e);
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Converted query to relational expression " + requestId + ":\n" + rootNode.explain());
}
return relRoot.withRel(rootNode);
}

private RelNode optimize(RelRoot relRoot, PlannerContext plannerContext) {
private RelNode optimize(RelRoot relRoot, PlannerContext plannerContext, long requestId) {
// TODO: add support for cost factory
try {
RelOptPlanner optPlanner = plannerContext.getRelOptPlanner();
optPlanner.setRoot(relRoot.rel);
RelNode optimized = optPlanner.findBestExp();
RelOptPlanner traitPlanner = plannerContext.getRelTraitPlanner();
traitPlanner.setRoot(optimized);
return traitPlanner.findBestExp();
RelNode bestExp = traitPlanner.findBestExp();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Optimized query {}:\n{}", requestId, RelOptUtil.toString(bestExp));
}
return bestExp;
} catch (Throwable e) {
throw new RuntimeException(
"Failed to generate a valid execution plan for query:\n" + RelOptUtil.toString(relRoot.rel), e);
Expand Down
46 changes: 46 additions & 0 deletions pinot-query-planner/src/test/resources/log4j2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--

Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.

-->
<Configuration>

<Properties>
<Property name="LOG_PATTERN">%d{yyyy/MM/dd HH:mm:ss.SSS} %p [%c{1}] [%t] %m%n</Property>
</Properties>
<Appenders>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="${env:LOG_PATTERN}"/>
</Console>
</Appenders>
<Loggers>
<Root level="info" additivity="false">
<AppenderRef ref="console"/>
</Root>
<AsyncLogger name="org.reflections" level="error" additivity="false"/>
<!-- Tip: Uncomment this in case you need to debug query transformation -->
<!-- <Logger name="org.apache.calcite.plan.RelOptPlanner" level="debug" additivity="false">-->
<!-- Change FULL_PLAN marker from onMatch="DENY" to onMatch='ACCEPT" to see the full plan before and after each
rule that modifies the plan is applied -->
<!-- <MarkerFilter marker="FULL_PLAN" onMatch="ACCEPT" onMismatch="NEUTRAL"/>-->
<!-- <MarkerFilter marker="FULL_PLAN" onMatch="DENY" onMismatch="NEUTRAL"/>-->
<!-- <AppenderRef ref="console"/>-->
<!-- </Logger>-->
</Loggers>
</Configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,49 @@
"expectedException": ".*No match found for function signature nonExistFun.*"
}
]
},
"literal_planning_cte_test": {
"comment": "Tests for CTEs in literal planning. The SQL parser cannot get rid of expressions that cross CTE, so this is useful to check that the expressions are simplified in the logical plan.",
"queries": [
{
"description": "Simple filter on constants is simplified",
"sql": "EXPLAIN PLAN FOR WITH CTE_B AS (SELECT 1692057600000 AS __ts FROM a GROUP BY __ts) SELECT 1692057600000 AS __ts FROM CTE_B WHERE __ts >= 1692057600000 GROUP BY __ts",
"output": [
"Execution Plan",
"\nLogicalProject(__ts=[1692057600000:BIGINT])",
"\n LogicalAggregate(group=[{0}])",
"\n PinotLogicalExchange(distribution=[hash[0]])",
"\n LogicalAggregate(group=[{0}])",
"\n LogicalProject(__ts=[1692057600000:BIGINT])",
"\n LogicalTableScan(table=[[default, a]])",
"\n"
Comment on lines +268 to +277
Copy link
Contributor Author

@gortiz gortiz May 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This query works in master. Our PinotEvaluateLiteralRule.Filter.INSTANCE is able to get rid of it.

But for example a query like:

WITH CTE_B AS (
  SELECT 'a' AS __ts FROM a GROUP BY __ts
) SELECT 1 
FROM CTE_B WHERE __ts >= `b`

Fails because PinotEvaluateLiteralRule.Filter.INSTANCE tries to apply >= in V1, which only works with numbers!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should no longer be an issue (#13711).

]
},
{
"description": "And filter on constants is simplified",
"sql": "EXPLAIN PLAN FOR WITH CTE_B AS (SELECT 1692057600000 AS __ts FROM a GROUP BY __ts) SELECT 1692057600000 AS __ts FROM CTE_B WHERE __ts >= 1692057600000 AND __ts < 1693267200000 GROUP BY __ts",
"output": [
"Execution Plan",
"\nLogicalProject(__ts=[1692057600000:BIGINT])",
"\n LogicalAggregate(group=[{0}])",
"\n PinotLogicalExchange(distribution=[hash[0]])",
"\n LogicalAggregate(group=[{0}])",
"\n LogicalProject(__ts=[1692057600000:BIGINT])",
"\n LogicalTableScan(table=[[default, a]])",
"\n"
Comment on lines +282 to +291
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is similar to the query in the PR description. It can be fixed by applying PinotEvaluateLiteralRule after PinotFilterExpandSearchRule or adding CompileTimeFunctionsInvoker into ServerPlanRequestUtils.QUERY_REWRITERS.

]
},
{
"description": "Search + OR filter on constants is simplified",
"sql": "EXPLAIN PLAN FOR WITH tmp2 AS (SELECT CASE WHEN col2 = 'VAL1' THEN 'A' ELSE col2 END AS cased FROM a) SELECT 1 FROM tmp2 WHERE ((cased = 'B') OR (cased = 'A'))",
"output": [
"Execution Plan",
"\nLogicalProject(EXPR$0=[1])",
"\n LogicalFilter(condition=[OR(=($1, _UTF-8'A'), =($1, _UTF-8'B'), =($1, _UTF-8'VAL1'))])",
"\n LogicalTableScan(table=[[default, a]])",
"\n"
]
}
]
}
}