-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Hack to fix some problematic queries #13174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,6 +73,8 @@ | |
| import org.apache.pinot.sql.parsers.CalciteSqlParser; | ||
| import org.apache.pinot.sql.parsers.SqlNodeAndOptions; | ||
| import org.apache.pinot.sql.parsers.parser.SqlPhysicalExplain; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -81,6 +83,7 @@ | |
| * <p>It provide the higher level entry interface to convert a SQL string into a {@link DispatchableSubPlan}. | ||
| */ | ||
| public class QueryEnvironment { | ||
| private static final Logger LOGGER = LoggerFactory.getLogger(QueryEnvironment.class); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a bunch of logs to better understand whether the tautologies were removed by the validator or the rules. In the cases I mention it is clear it is always the rules, but it was useful to detect other cases where I actually include the tautology in the SQL request
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All logs are in debug mode, so they shouldn't be enabled by default. |
||
| // Calcite configurations | ||
| private final RelDataTypeFactory _typeFactory; | ||
| private final Prepare.CatalogReader _catalogReader; | ||
|
|
@@ -126,7 +129,7 @@ private PlannerContext getPlannerContext() { | |
| public QueryPlannerResult planQuery(String sqlQuery, SqlNodeAndOptions sqlNodeAndOptions, long requestId) { | ||
| try (PlannerContext plannerContext = getPlannerContext()) { | ||
| plannerContext.setOptions(sqlNodeAndOptions.getOptions()); | ||
| RelRoot relRoot = compileQuery(sqlNodeAndOptions.getSqlNode(), plannerContext); | ||
| RelRoot relRoot = compileQuery(sqlNodeAndOptions.getSqlNode(), plannerContext, requestId); | ||
| // TODO: current code only assume one SubPlan per query, but we should support multiple SubPlans per query. | ||
| // Each SubPlan should be able to run independently from Broker then set the results into the dependent | ||
| // SubPlan for further processing. | ||
|
|
@@ -155,7 +158,7 @@ public QueryPlannerResult explainQuery(String sqlQuery, SqlNodeAndOptions sqlNod | |
| try (PlannerContext plannerContext = getPlannerContext()) { | ||
| SqlExplain explain = (SqlExplain) sqlNodeAndOptions.getSqlNode(); | ||
| plannerContext.setOptions(sqlNodeAndOptions.getOptions()); | ||
| RelRoot relRoot = compileQuery(explain.getExplicandum(), plannerContext); | ||
| RelRoot relRoot = compileQuery(explain.getExplicandum(), plannerContext, requestId); | ||
| if (explain instanceof SqlPhysicalExplain) { | ||
| // get the physical plan for query. | ||
| DispatchableSubPlan dispatchableSubPlan = toDispatchableSubPlan(relRoot, plannerContext, requestId); | ||
|
|
@@ -190,7 +193,7 @@ public List<String> getTableNamesForQuery(String sqlQuery) { | |
| if (sqlNode.getKind().equals(SqlKind.EXPLAIN)) { | ||
| sqlNode = ((SqlExplain) sqlNode).getExplicandum(); | ||
| } | ||
| RelRoot relRoot = compileQuery(sqlNode, plannerContext); | ||
| RelRoot relRoot = compileQuery(sqlNode, plannerContext, 0); | ||
| Set<String> tableNames = RelToPlanNodeConverter.getTableNamesFromRelRoot(relRoot.rel); | ||
| return new ArrayList<>(tableNames); | ||
| } catch (Throwable t) { | ||
|
|
@@ -230,23 +233,25 @@ public Set<String> getTableNames() { | |
| // steps | ||
| // -------------------------------------------------------------------------- | ||
|
|
||
| private RelRoot compileQuery(SqlNode sqlNode, PlannerContext plannerContext) { | ||
| SqlNode validated = validate(sqlNode, plannerContext); | ||
| RelRoot relation = toRelation(validated, plannerContext); | ||
| RelNode optimized = optimize(relation, plannerContext); | ||
| private RelRoot compileQuery(SqlNode sqlNode, PlannerContext plannerContext, long requestId) { | ||
| SqlNode validated = validate(sqlNode, plannerContext, requestId); | ||
| RelRoot relation = toRelation(validated, plannerContext, requestId); | ||
| RelNode optimized = optimize(relation, plannerContext, requestId); | ||
| return relation.withRel(optimized); | ||
| } | ||
|
|
||
| private SqlNode validate(SqlNode sqlNode, PlannerContext plannerContext) { | ||
| private SqlNode validate(SqlNode sqlNode, PlannerContext plannerContext, long requestId) { | ||
| LOGGER.debug("Validating query {}:\n{}", requestId, sqlNode); | ||
| SqlNode validated = plannerContext.getValidator().validate(sqlNode); | ||
| if (!validated.getKind().belongsTo(SqlKind.QUERY)) { | ||
| throw new IllegalArgumentException("Unsupported SQL query, failed to validate query:\n" + sqlNode); | ||
| } | ||
| validated.accept(new BytesCastVisitor(plannerContext.getValidator())); | ||
| LOGGER.debug("Validated query {}:\n{}", requestId, validated); | ||
| return validated; | ||
| } | ||
|
|
||
| private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext) { | ||
| private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext, long requestId) { | ||
| RexBuilder rexBuilder = new RexBuilder(_typeFactory); | ||
| RelOptCluster cluster = RelOptCluster.create(plannerContext.getRelOptPlanner(), rexBuilder); | ||
| SqlToRelConverter converter = | ||
|
|
@@ -272,18 +277,25 @@ private RelRoot toRelation(SqlNode sqlNode, PlannerContext plannerContext) { | |
| } catch (Throwable e) { | ||
| throw new RuntimeException("Failed to trim unused fields from query:\n" + RelOptUtil.toString(rootNode), e); | ||
| } | ||
| if (LOGGER.isDebugEnabled()) { | ||
| LOGGER.debug("Converted query to relational expression " + requestId + ":\n" + rootNode.explain()); | ||
| } | ||
| return relRoot.withRel(rootNode); | ||
| } | ||
|
|
||
| private RelNode optimize(RelRoot relRoot, PlannerContext plannerContext) { | ||
| private RelNode optimize(RelRoot relRoot, PlannerContext plannerContext, long requestId) { | ||
| // TODO: add support for cost factory | ||
| try { | ||
| RelOptPlanner optPlanner = plannerContext.getRelOptPlanner(); | ||
| optPlanner.setRoot(relRoot.rel); | ||
| RelNode optimized = optPlanner.findBestExp(); | ||
| RelOptPlanner traitPlanner = plannerContext.getRelTraitPlanner(); | ||
| traitPlanner.setRoot(optimized); | ||
| return traitPlanner.findBestExp(); | ||
| RelNode bestExp = traitPlanner.findBestExp(); | ||
| if (LOGGER.isDebugEnabled()) { | ||
| LOGGER.debug("Optimized query {}:\n{}", requestId, RelOptUtil.toString(bestExp)); | ||
| } | ||
| return bestExp; | ||
| } catch (Throwable e) { | ||
| throw new RuntimeException( | ||
| "Failed to generate a valid execution plan for query:\n" + RelOptUtil.toString(relRoot.rel), e); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <!-- | ||
|
|
||
| Licensed to the Apache Software Foundation (ASF) under one | ||
| or more contributor license agreements. See the NOTICE file | ||
| distributed with this work for additional information | ||
| regarding copyright ownership. The ASF licenses this file | ||
| to you under the Apache License, Version 2.0 (the | ||
| "License"); you may not use this file except in compliance | ||
| with the License. You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, | ||
| software distributed under the License is distributed on an | ||
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| KIND, either express or implied. See the License for the | ||
| specific language governing permissions and limitations | ||
| under the License. | ||
|
|
||
| --> | ||
| <Configuration> | ||
|
|
||
| <Properties> | ||
| <Property name="LOG_PATTERN">%d{yyyy/MM/dd HH:mm:ss.SSS} %p [%c{1}] [%t] %m%n</Property> | ||
| </Properties> | ||
| <Appenders> | ||
| <Console name="console" target="SYSTEM_OUT"> | ||
| <PatternLayout pattern="${env:LOG_PATTERN}"/> | ||
| </Console> | ||
| </Appenders> | ||
| <Loggers> | ||
| <Root level="info" additivity="false"> | ||
| <AppenderRef ref="console"/> | ||
| </Root> | ||
| <AsyncLogger name="org.reflections" level="error" additivity="false"/> | ||
| <!-- Tip: Uncomment this in case you need to debug query transformation --> | ||
| <!-- <Logger name="org.apache.calcite.plan.RelOptPlanner" level="debug" additivity="false">--> | ||
| <!-- Change FULL_PLAN marker from onMatch="DENY" to onMatch='ACCEPT" to see the full plan before and after each | ||
| rule that modifies the plan is applied --> | ||
| <!-- <MarkerFilter marker="FULL_PLAN" onMatch="ACCEPT" onMismatch="NEUTRAL"/>--> | ||
| <!-- <MarkerFilter marker="FULL_PLAN" onMatch="DENY" onMismatch="NEUTRAL"/>--> | ||
| <!-- <AppenderRef ref="console"/>--> | ||
| <!-- </Logger>--> | ||
| </Loggers> | ||
| </Configuration> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -259,5 +259,49 @@ | |
| "expectedException": ".*No match found for function signature nonExistFun.*" | ||
| } | ||
| ] | ||
| }, | ||
| "literal_planning_cte_test": { | ||
| "comment": "Tests for CTEs in literal planning. The SQL parser cannot get rid of expressions that cross CTE, so this is useful to check that the expressions are simplified in the logical plan.", | ||
| "queries": [ | ||
| { | ||
| "description": "Simple filter on constants is simplified", | ||
| "sql": "EXPLAIN PLAN FOR WITH CTE_B AS (SELECT 1692057600000 AS __ts FROM a GROUP BY __ts) SELECT 1692057600000 AS __ts FROM CTE_B WHERE __ts >= 1692057600000 GROUP BY __ts", | ||
| "output": [ | ||
| "Execution Plan", | ||
| "\nLogicalProject(__ts=[1692057600000:BIGINT])", | ||
| "\n LogicalAggregate(group=[{0}])", | ||
| "\n PinotLogicalExchange(distribution=[hash[0]])", | ||
| "\n LogicalAggregate(group=[{0}])", | ||
| "\n LogicalProject(__ts=[1692057600000:BIGINT])", | ||
| "\n LogicalTableScan(table=[[default, a]])", | ||
| "\n" | ||
|
Comment on lines
+268
to
+277
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This query works in master. Our But for example a query like: WITH CTE_B AS (
SELECT 'a' AS __ts FROM a GROUP BY __ts
) SELECT 1
FROM CTE_B WHERE __ts >= `b`Fails because
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should no longer be an issue (#13711). |
||
| ] | ||
| }, | ||
| { | ||
| "description": "And filter on constants is simplified", | ||
| "sql": "EXPLAIN PLAN FOR WITH CTE_B AS (SELECT 1692057600000 AS __ts FROM a GROUP BY __ts) SELECT 1692057600000 AS __ts FROM CTE_B WHERE __ts >= 1692057600000 AND __ts < 1693267200000 GROUP BY __ts", | ||
| "output": [ | ||
| "Execution Plan", | ||
| "\nLogicalProject(__ts=[1692057600000:BIGINT])", | ||
| "\n LogicalAggregate(group=[{0}])", | ||
| "\n PinotLogicalExchange(distribution=[hash[0]])", | ||
| "\n LogicalAggregate(group=[{0}])", | ||
| "\n LogicalProject(__ts=[1692057600000:BIGINT])", | ||
| "\n LogicalTableScan(table=[[default, a]])", | ||
| "\n" | ||
|
Comment on lines
+282
to
+291
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is similar to the query in the PR description. It can be fixed by applying |
||
| ] | ||
| }, | ||
| { | ||
| "description": "Search + OR filter on constants is simplified", | ||
| "sql": "EXPLAIN PLAN FOR WITH tmp2 AS (SELECT CASE WHEN col2 = 'VAL1' THEN 'A' ELSE col2 END AS cased FROM a) SELECT 1 FROM tmp2 WHERE ((cased = 'B') OR (cased = 'A'))", | ||
| "output": [ | ||
| "Execution Plan", | ||
| "\nLogicalProject(EXPR$0=[1])", | ||
| "\n LogicalFilter(condition=[OR(=($1, _UTF-8'A'), =($1, _UTF-8'B'), =($1, _UTF-8'VAL1'))])", | ||
| "\n LogicalTableScan(table=[[default, a]])", | ||
| "\n" | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Boy Scout Rule