+
- 300 ]]>
+ 300]]>
($6, 300))])
- +- LogicalJoin(condition=[=($0, $7)], joinType=[inner])
- :- LogicalAggregate(group=[{0, 1}], EXPR$2=[SUM($2)])
- : +- LogicalProject(fact_date_sk=[$4], amount=[$6], price=[$3])
- : +- LogicalJoin(condition=[=($0, $5)], joinType=[inner])
- : :- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
- : +- LogicalTableScan(table=[[testCatalog, test_database, item]])
+LogicalProject(id=[$0], name=[$1], amount=[$2], price=[$3], fact_date_sk=[$4], id0=[$5], amount0=[$6], price0=[$7], id1=[$8], male=[$9], amount1=[$10], price1=[$11], dim_date_sk=[$12])
++- LogicalFilter(condition=[AND(=($4, $12), =($0, $5), =($8, $5), <($11, 500), >($11, 300))])
+ +- LogicalJoin(condition=[true], joinType=[inner])
+ :- LogicalJoin(condition=[true], joinType=[inner])
+ : :- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ : +- LogicalTableScan(table=[[testCatalog, test_database, item]])
+- LogicalTableScan(table=[[testCatalog, test_database, dim]])
]]>
From 1b52b01f726206fe54d186ded75acc176ccda599 Mon Sep 17 00:00:00 2001
From: "zhengyunhong.zyh" <337361684@qq.com>
Date: Thu, 29 Dec 2022 13:59:35 +0800
Subject: [PATCH 3/5] Some minor changes2
---
.../program/FlinkDynamicPartitionPruningProgram.java | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
index 52590d32731ad..25311764442ef 100644
--- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
+++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
@@ -64,6 +64,13 @@
* +- Calc(select=[xxx], where=[<(xxx, xxx)]) # Need have an arbitrary filter condition.
* +- TableSourceScan(table=[[dim, filter=[]]], fields=[xxx, dim_key])
* }
+ *
+ * We use a {@link FlinkOptimizeProgram} instead of a {@link org.apache.calcite.plan.RelRule} to
+ * realize dynamic partition pruning because the {@link org.apache.calcite.plan.hep.HepPlanner} in
+ * Flink doesn't support matching a simple join, and replacing one node on one side of the join
+ * node. After that, rebuilding this join node. This is a defect of the existing optimizer, and it's
+ * matching pattern need to be simpler. Only then can we use {@link org.apache.calcite.plan.RelRule}
+ * to achieve dpp.
*/
public class FlinkDynamicPartitionPruningProgram
implements FlinkOptimizeProgram {
From 1bda3812ca4cad3d6e650aaa4c79608c6bdd9d0c Mon Sep 17 00:00:00 2001
From: "zhengyunhong.zyh" <337361684@qq.com>
Date: Wed, 4 Jan 2023 11:54:05 +0800
Subject: [PATCH 4/5] dpp will not success if agg push down enabled
---
.../utils/DynamicPartitionPruningUtils.java | 59 +++--
.../DynamicPartitionPruningProgramTest.java | 73 +++++-
.../DynamicPartitionPruningProgramTest.xml | 236 +++++++++++++-----
3 files changed, 272 insertions(+), 96 deletions(-)
diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
index ef8506a7c6fdd..3bbeec455d2df 100644
--- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
+++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
@@ -21,15 +21,18 @@
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.transformations.SourceTransformation;
+import org.apache.flink.table.api.config.OptimizerConfigOptions;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.catalog.ContextResolvedTable;
import org.apache.flink.table.connector.source.DataStreamScanProvider;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceProvider;
+import org.apache.flink.table.connector.source.abilities.SupportsAggregatePushDown;
import org.apache.flink.table.connector.source.abilities.SupportsDynamicFiltering;
import org.apache.flink.table.planner.calcite.FlinkTypeFactory;
import org.apache.flink.table.planner.connectors.TransformationScanProvider;
+import org.apache.flink.table.planner.plan.abilities.source.AggregatePushDownSpec;
import org.apache.flink.table.planner.plan.abilities.source.FilterPushDownSpec;
import org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec;
import org.apache.flink.table.planner.plan.nodes.physical.batch.BatchPhysicalDynamicFilteringDataCollector;
@@ -137,6 +140,20 @@ private static RelNode convertDppFactSide(
|| !(tableSource instanceof ScanTableSource)) {
return rel;
}
+
+ // Dpp cannot success if source support aggregate push down, source aggregate push
+ // down enabled is true and aggregate push down success.
+ if (tableSource instanceof SupportsAggregatePushDown
+ && ShortcutUtils.unwrapContext(rel)
+ .getTableConfig()
+ .get(
+ OptimizerConfigOptions
+ .TABLE_OPTIMIZER_SOURCE_AGGREGATE_PUSHDOWN_ENABLED)
+ && Arrays.stream(tableSourceTable.abilitySpecs())
+ .anyMatch(spec -> spec instanceof AggregatePushDownSpec)) {
+ return rel;
+ }
+
if (!isNewSource((ScanTableSource) tableSource)) {
return rel;
}
@@ -256,14 +273,29 @@ private static RelNode convertDppFactSide(
} else if (rel instanceof BatchPhysicalGroupAggregateBase) {
BatchPhysicalGroupAggregateBase agg = (BatchPhysicalGroupAggregateBase) rel;
RelNode input = agg.getInput();
+ int[] grouping = agg.grouping();
+
+ // If one of joinKey in joinKeys are aggregate function field, dpp will not success.
+ for (int k : joinKeys) {
+ if (k >= grouping.length) {
+ return rel;
+ }
+ }
+
RelNode convertedRel =
convertDppFactSide(
input,
- getInputIndices(agg, input, joinKeys),
+ ImmutableIntList.copyOf(
+ joinKeys.stream()
+ .map(joinKey -> agg.grouping()[joinKey])
+ .collect(Collectors.toList())),
dimSide,
dimSideJoinKey,
factSideFactors);
return agg.copy(agg.getTraitSet(), Collections.singletonList(convertedRel));
+ } else {
+ // TODO In the future, we need to support more operators to enrich matchable dpp
+ // pattern.
}
return rel;
@@ -283,31 +315,6 @@ public static boolean isSuitableJoin(Join join) {
return !joinInfo.leftKeys.isEmpty();
}
- private static ImmutableIntList getInputIndices(
- BatchPhysicalGroupAggregateBase agg, RelNode aggInput, ImmutableIntList joinKeys) {
- int[] indexMap = new int[aggInput.getRowType().getFieldCount()];
- int[] grouping = agg.grouping();
- if (grouping.length == 0) {
- return joinKeys;
- }
- int beginIndex = grouping[0] - 1;
- for (int i = 0; i < indexMap.length; i++) {
- indexMap[i] = i;
- }
-
- System.arraycopy(grouping, 0, indexMap, 0, grouping.length);
- if (beginIndex >= 0) {
- for (int i = 0; i <= beginIndex; i++) {
- indexMap[grouping.length + i] = i;
- }
- }
- List indices = new ArrayList<>();
- for (int k : joinKeys) {
- indices.add(indexMap[k]);
- }
- return ImmutableIntList.copyOf(indices);
- }
-
private static BatchPhysicalDynamicFilteringDataCollector createDynamicFilteringConnector(
RelNode dimSide, List dynamicFilteringFieldIndices) {
final RelDataType outputType =
diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.java
index 01f5c6ef0574f..cef12b156aaf5 100644
--- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.java
+++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.java
@@ -509,7 +509,7 @@ public void testDppWithUnionInFactSide() {
}
@Test
- public void testDppWithAggInFactSide() {
+ public void testDppWithAggInFactSideAndJoinKeyInGrouping() {
// Dpp will success
String ddl =
"CREATE TABLE test_database.item (\n"
@@ -529,6 +529,77 @@ public void testDppWithAggInFactSide() {
util.verifyRelPlan(query);
}
+ @Test
+ public void testDppWithAggInFactSideAndJoinKeyInGroupFunction() {
+ // Dpp will not success because join key in group function.
+ String ddl =
+ "CREATE TABLE test_database.item (\n"
+ + " id BIGINT,\n"
+ + " amount BIGINT,\n"
+ + " price BIGINT\n"
+ + ") WITH (\n"
+ + " 'connector' = 'values',\n"
+ + " 'bounded' = 'true'\n"
+ + ")";
+ util.tableEnv().executeSql(ddl);
+
+ String query =
+ "Select * from (Select fact_part.id, item.amount, fact_part.name, sum(fact_part.price), sum(item.price), sum(fact_date_sk) as fact_date_sk1 "
+ + "from fact_part join item on fact_part.id = item.id "
+ + "group by fact_part.id, fact_part.name, item.amount) t1 "
+ + "join dim on t1.fact_date_sk1 = dim.dim_date_sk where dim.price < 500 and dim.price > 300 ";
+ util.verifyRelPlan(query);
+ }
+
+ @Test
+ public void testDppWithAggInFactSideWithAggPushDownEnable() {
+ // Dpp will not success while fact side source support agg push down and source agg push
+ // down enabled is true.
+ String ddl =
+ "CREATE TABLE test_database.item (\n"
+ + " id BIGINT,\n"
+ + " amount BIGINT,\n"
+ + " price BIGINT\n"
+ + ") WITH (\n"
+ + " 'connector' = 'values',\n"
+ + " 'bounded' = 'true'\n"
+ + ")";
+ util.tableEnv().executeSql(ddl);
+
+ String query =
+ "Select * from (Select id, amount, fact_date_sk, count(name), sum(price) "
+ + "from fact_part where fact_date_sk > 100 group by id, amount, fact_date_sk) t1 "
+ + "join dim on t1.fact_date_sk = dim.dim_date_sk where dim.price < 500 and dim.price > 300 ";
+ util.verifyRelPlan(query);
+ }
+
+ @Test
+ public void testDppWithAggInFactSideWithAggPushDownDisable() {
+ // Dpp will success while fact side source support agg push down but source agg push down
+ // enabled is false.
+ TableConfig tableConfig = util.tableEnv().getConfig();
+ // Disable source agg push down.
+ tableConfig.set(
+ OptimizerConfigOptions.TABLE_OPTIMIZER_SOURCE_AGGREGATE_PUSHDOWN_ENABLED, false);
+
+ String ddl =
+ "CREATE TABLE test_database.item (\n"
+ + " id BIGINT,\n"
+ + " amount BIGINT,\n"
+ + " price BIGINT\n"
+ + ") WITH (\n"
+ + " 'connector' = 'values',\n"
+ + " 'bounded' = 'true'\n"
+ + ")";
+ util.tableEnv().executeSql(ddl);
+
+ String query =
+ "Select * from (Select id, amount, fact_date_sk, count(name), sum(price) "
+ + "from fact_part where fact_date_sk > 100 group by id, amount, fact_date_sk) t1 "
+ + "join dim on t1.fact_date_sk = dim.dim_date_sk where dim.price < 500 and dim.price > 300 ";
+ util.verifyRelPlan(query);
+ }
+
@Test
public void testDPPWithJoinReorderTableWithoutStats() {
// Dpp will success.
diff --git a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.xml b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.xml
index 4c6757a7ca4ee..5ff1bafdb97df 100644
--- a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.xml
+++ b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/optimize/program/DynamicPartitionPruningProgramTest.xml
@@ -243,35 +243,43 @@ HashJoin(joinType=[InnerJoin], where=[=(fact_date_sk, dim_date_sk)], select=[id,
]]>
-
+
- 200 and dim.price < 500]]>
+ 300 ]]>
($8, 200), <($3, 500))])
- +- LogicalJoin(condition=[true], joinType=[inner])
- :- LogicalTableScan(table=[[testCatalog, test_database, dim]])
- +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+LogicalProject(id=[$0], amount=[$1], name=[$2], EXPR$3=[$3], EXPR$4=[$4], fact_date_sk1=[$5], id0=[$6], male=[$7], amount0=[$8], price=[$9], dim_date_sk=[$10])
++- LogicalFilter(condition=[AND(<($9, 500), >($9, 300))])
+ +- LogicalJoin(condition=[=($5, $10)], joinType=[inner])
+ :- LogicalProject(id=[$0], amount=[$2], name=[$1], EXPR$3=[$3], EXPR$4=[$4], fact_date_sk1=[$5])
+ : +- LogicalAggregate(group=[{0, 1, 2}], EXPR$3=[SUM($3)], EXPR$4=[SUM($4)], fact_date_sk1=[SUM($5)])
+ : +- LogicalProject(id=[$0], name=[$1], amount=[$6], price=[$3], price0=[$7], fact_date_sk=[$4])
+ : +- LogicalJoin(condition=[=($0, $5)], joinType=[inner])
+ : :- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ : +- LogicalTableScan(table=[[testCatalog, test_database, item]])
+ +- LogicalTableScan(table=[[testCatalog, test_database, dim]])
]]>
(price, 200)])
- +- DynamicFilteringTableSourceScan(table=[[testCatalog, test_database, fact_part, filter=[]]], fields=[id, name, amount, price, fact_date_sk])
- +- DynamicFilteringDataCollector(fields=[dim_date_sk])
- +- Calc(select=[id, male, amount, price, dim_date_sk], where=[<(price, 500)])
- +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
+HashJoin(joinType=[InnerJoin], where=[=(fact_date_sk1, dim_date_sk)], select=[id, amount, name, EXPR$3, EXPR$4, fact_date_sk1, id0, male, amount0, price, dim_date_sk], build=[left])
+:- Exchange(distribution=[hash[fact_date_sk1]])
+: +- Calc(select=[id, amount, name, EXPR$3, EXPR$4, fact_date_sk1])
+: +- HashAggregate(isMerge=[false], groupBy=[id, name, amount], select=[id, name, amount, SUM(price) AS EXPR$3, SUM(price0) AS EXPR$4, SUM(fact_date_sk) AS fact_date_sk1])
+: +- Calc(select=[id, name, amount, price, price0, fact_date_sk])
+: +- HashJoin(joinType=[InnerJoin], where=[=(id, id0)], select=[id, name, price, fact_date_sk, id0, amount, price0], build=[right])
+: :- Exchange(distribution=[hash[id]])
+: : +- TableSourceScan(table=[[testCatalog, test_database, fact_part, project=[id, name, price, fact_date_sk], metadata=[]]], fields=[id, name, price, fact_date_sk])
+: +- Exchange(distribution=[hash[id]])
+: +- TableSourceScan(table=[[testCatalog, test_database, item]], fields=[id, amount, price])
++- Exchange(distribution=[hash[dim_date_sk]])
+ +- Calc(select=[id, male, amount, price, dim_date_sk], where=[SEARCH(price, Sarg[(300..500)])])
+ +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
]]>
-
+
300 ]]>
@@ -304,6 +312,68 @@ HashJoin(joinType=[InnerJoin], where=[=(fact_date_sk, dim_date_sk)], select=[fac
: : +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
: +- Exchange(distribution=[hash[id]])
: +- TableSourceScan(table=[[testCatalog, test_database, item, project=[id, amount], metadata=[]]], fields=[id, amount])
++- Exchange(distribution=[hash[dim_date_sk]])
+ +- Calc(select=[id, male, amount, price, dim_date_sk], where=[SEARCH(price, Sarg[(300..500)])])
+ +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
+]]>
+
+
+
+
+ 100 group by id, amount, fact_date_sk) t1 join dim on t1.fact_date_sk = dim.dim_date_sk where dim.price < 500 and dim.price > 300 ]]>
+
+
+ ($8, 300))])
+ +- LogicalJoin(condition=[=($2, $9)], joinType=[inner])
+ :- LogicalAggregate(group=[{0, 1, 2}], EXPR$3=[COUNT($3)], EXPR$4=[SUM($4)])
+ : +- LogicalProject(id=[$0], amount=[$2], fact_date_sk=[$4], name=[$1], price=[$3])
+ : +- LogicalFilter(condition=[>($4, 100)])
+ : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ +- LogicalTableScan(table=[[testCatalog, test_database, dim]])
+]]>
+
+
+
+
+
+
+
+ 100 group by id, amount, fact_date_sk) t1 join dim on t1.fact_date_sk = dim.dim_date_sk where dim.price < 500 and dim.price > 300 ]]>
+
+
+ ($8, 300))])
+ +- LogicalJoin(condition=[=($2, $9)], joinType=[inner])
+ :- LogicalAggregate(group=[{0, 1, 2}], EXPR$3=[COUNT($3)], EXPR$4=[SUM($4)])
+ : +- LogicalProject(id=[$0], amount=[$2], fact_date_sk=[$4], name=[$1], price=[$3])
+ : +- LogicalFilter(condition=[>($4, 100)])
+ : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ +- LogicalTableScan(table=[[testCatalog, test_database, dim]])
+]]>
+
+
+
+
+
+
+
+ 300]]>
+
+
+ ($9, 300))])
+ +- LogicalJoin(condition=[true], joinType=[inner])
+ :- LogicalJoin(condition=[true], joinType=[inner])
+ : :- LogicalUnion(all=[true])
+ : : :- LogicalProject(id=[$0], fact_date_sk=[$4], amount1=[+($2, 1)])
+ : : : +- LogicalFilter(condition=[=($3, 1)])
+ : : : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ : : +- LogicalProject(id=[$0], fact_date_sk=[$4], EXPR$2=[+($2, 1)])
+ : : +- LogicalFilter(condition=[=($3, 2)])
+ : : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
+ : +- LogicalTableScan(table=[[testCatalog, test_database, item]])
+ +- LogicalTableScan(table=[[testCatalog, test_database, dim]])
+]]>
+
+
+
@@ -729,17 +845,17 @@ NestedLoopJoin(joinType=[InnerJoin], where=[=(fact_date_sk, dim_date_sk)], selec
]]>
-
+
-
+ 200 and dim.price < 500]]>
($8, 200), <($3, 500))])
+- LogicalJoin(condition=[true], joinType=[inner])
:- LogicalTableScan(table=[[testCatalog, test_database, dim]])
- +- LogicalTableScan(table=[[testCatalog, test_database, legacy_source]])
+ +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
]]>
@@ -749,53 +865,11 @@ HashJoin(joinType=[InnerJoin], where=[=(fact_date_sk, dim_date_sk)], select=[id,
: +- Calc(select=[id, male, amount, price, dim_date_sk], where=[<(price, 500)])
: +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
+- Exchange(distribution=[hash[fact_date_sk]])
- +- TableSourceScan(table=[[testCatalog, test_database, legacy_source]], fields=[id, name, amount, price, fact_date_sk])
-]]>
-
-
-
-
- 300]]>
-
-
- ($9, 300))])
- +- LogicalJoin(condition=[true], joinType=[inner])
- :- LogicalJoin(condition=[true], joinType=[inner])
- : :- LogicalUnion(all=[true])
- : : :- LogicalProject(id=[$0], fact_date_sk=[$4], amount1=[+($2, 1)])
- : : : +- LogicalFilter(condition=[=($3, 1)])
- : : : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
- : : +- LogicalProject(id=[$0], fact_date_sk=[$4], EXPR$2=[+($2, 1)])
- : : +- LogicalFilter(condition=[=($3, 2)])
- : : +- LogicalTableScan(table=[[testCatalog, test_database, fact_part]])
- : +- LogicalTableScan(table=[[testCatalog, test_database, item]])
- +- LogicalTableScan(table=[[testCatalog, test_database, dim]])
-]]>
-
-
- (price, 200)])
+ +- DynamicFilteringTableSourceScan(table=[[testCatalog, test_database, fact_part, filter=[]]], fields=[id, name, amount, price, fact_date_sk])
+ +- DynamicFilteringDataCollector(fields=[dim_date_sk])
+ +- Calc(select=[id, male, amount, price, dim_date_sk], where=[<(price, 500)])
+ +- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
]]>
@@ -851,6 +925,30 @@ NestedLoopJoin(joinType=[InnerJoin], where=[AND(=(fact_date_sk, dim_date_sk), =(
+- DynamicFilteringDataCollector(fields=[dim_date_sk])
+- Calc(select=[id, male, amount, price, dim_date_sk], where=[<(price, 500)])
+- TableSourceScan(table=[[testCatalog, test_database, dim, filter=[]]], fields=[id, male, amount, price, dim_date_sk])
+]]>
+
+
+
+
+
+
+
+
+
+
+
From 96569781df60b2f8175d2c0a76d726d46a9a3bd9 Mon Sep 17 00:00:00 2001
From: "zhengyunhong.zyh" <337361684@qq.com>
Date: Fri, 6 Jan 2023 19:16:44 +0800
Subject: [PATCH 5/5] Reformat the dynamic partition pruning utils
---
.../FlinkDynamicPartitionPruningProgram.java | 27 +-
.../utils/DynamicPartitionPruningUtils.java | 794 +++++++++---------
2 files changed, 414 insertions(+), 407 deletions(-)
diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
index 25311764442ef..22aebd7fcab20 100644
--- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
+++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/optimize/program/FlinkDynamicPartitionPruningProgram.java
@@ -65,29 +65,26 @@
* +- TableSourceScan(table=[[dim, filter=[]]], fields=[xxx, dim_key])
* }
*
- * We use a {@link FlinkOptimizeProgram} instead of a {@link org.apache.calcite.plan.RelRule} to
- * realize dynamic partition pruning because the {@link org.apache.calcite.plan.hep.HepPlanner} in
- * Flink doesn't support matching a simple join, and replacing one node on one side of the join
- * node. After that, rebuilding this join node. This is a defect of the existing optimizer, and it's
- * matching pattern need to be simpler. Only then can we use {@link org.apache.calcite.plan.RelRule}
- * to achieve dpp.
+ *
Note: We use a {@link FlinkOptimizeProgram} instead of a {@link
+ * org.apache.calcite.plan.RelRule} here because the {@link org.apache.calcite.plan.hep.HepPlanner}
+ * doesn't support matching a partially determined pattern or dynamically replacing the inputs of
+ * matched nodes. Once we improve the {@link org.apache.calcite.plan.hep.HepPlanner}, then class can
+ * be converted to {@link org.apache.calcite.plan.RelRule}.
*/
public class FlinkDynamicPartitionPruningProgram
implements FlinkOptimizeProgram {
@Override
public RelNode optimize(RelNode root, BatchOptimizeContext context) {
+ if (!ShortcutUtils.unwrapContext(root)
+ .getTableConfig()
+ .get(OptimizerConfigOptions.TABLE_OPTIMIZER_DYNAMIC_FILTERING_ENABLED)) {
+ return root;
+ }
DefaultRelShuttle shuttle =
new DefaultRelShuttle() {
@Override
public RelNode visit(RelNode rel) {
- if (!ShortcutUtils.unwrapContext(rel)
- .getTableConfig()
- .get(
- OptimizerConfigOptions
- .TABLE_OPTIMIZER_DYNAMIC_FILTERING_ENABLED)) {
- return rel;
- }
if (!(rel instanceof Join)
|| !DynamicPartitionPruningUtils.isSuitableJoin((Join) rel)) {
List newInputs = new ArrayList<>();
@@ -103,7 +100,7 @@ public RelNode visit(RelNode rel) {
RelNode rightSide = join.getRight();
Join newJoin = join;
boolean changed = false;
- if (DynamicPartitionPruningUtils.isDimSide(leftSide)) {
+ if (DynamicPartitionPruningUtils.isDppDimSide(leftSide)) {
if (join.getJoinType() != JoinRelType.RIGHT) {
Tuple2 relTuple =
DynamicPartitionPruningUtils
@@ -118,7 +115,7 @@ public RelNode visit(RelNode rel) {
join.getTraitSet(),
Arrays.asList(leftSide, relTuple.f1.accept(this)));
}
- } else if (DynamicPartitionPruningUtils.isDimSide(rightSide)) {
+ } else if (DynamicPartitionPruningUtils.isDppDimSide(rightSide)) {
if (join.getJoinType() != JoinRelType.LEFT) {
Tuple2 relTuple =
DynamicPartitionPruningUtils
diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
index 3bbeec455d2df..90f7b40bc0b42 100644
--- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
+++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/utils/DynamicPartitionPruningUtils.java
@@ -21,14 +21,12 @@
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.transformations.SourceTransformation;
-import org.apache.flink.table.api.config.OptimizerConfigOptions;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.catalog.ContextResolvedTable;
import org.apache.flink.table.connector.source.DataStreamScanProvider;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceProvider;
-import org.apache.flink.table.connector.source.abilities.SupportsAggregatePushDown;
import org.apache.flink.table.connector.source.abilities.SupportsDynamicFiltering;
import org.apache.flink.table.planner.calcite.FlinkTypeFactory;
import org.apache.flink.table.planner.connectors.TransformationScanProvider;
@@ -71,458 +69,470 @@
public class DynamicPartitionPruningUtils {
/**
- * Judge whether input RelNode meets the conditions of dimSide. If joinKeys is null means we
+ * Judge whether the input RelNode meets the conditions of dimSide. If joinKeys is null means we
* need not consider the join keys in dim side, which already deal by dynamic partition pruning
* rule. If joinKeys not null means we need to judge whether joinKeys changed in dim side, if
* changed, this RelNode is not dim side.
*/
- public static boolean isDimSide(RelNode rel) {
- DppDimSideFactors dimSideFactors = new DppDimSideFactors();
- visitDimSide(rel, dimSideFactors);
- return dimSideFactors.isDimSide();
+ public static boolean isDppDimSide(RelNode rel) {
+ DppDimSideChecker dimSideChecker = new DppDimSideChecker(rel);
+ return dimSideChecker.isDppDimSide();
}
+ /**
+ * Judge whether the input RelNode can be converted to the dpp fact side. If the input RelNode
+ * can be converted, this method will return the converted fact side whose partitioned table
+ * source will be converted to {@link BatchPhysicalDynamicFilteringTableSourceScan}, If not,
+ * this method will return the origin RelNode.
+ */
public static Tuple2 canConvertAndConvertDppFactSide(
RelNode rel,
ImmutableIntList joinKeys,
RelNode dimSide,
ImmutableIntList dimSideJoinKey) {
- DppFactSideFactors factSideFactors = new DppFactSideFactors();
- RelNode newRel =
- convertDppFactSide(rel, joinKeys, dimSide, dimSideJoinKey, factSideFactors);
- return Tuple2.of(factSideFactors.isChanged, newRel);
+ DppFactSideChecker dppFactSideChecker =
+ new DppFactSideChecker(rel, joinKeys, dimSide, dimSideJoinKey);
+ return dppFactSideChecker.canConvertAndConvertDppFactSide();
}
- private static List getSuitableDynamicFilteringFieldsInFactSide(
- DynamicTableSource tableSource, List candidateFields) {
- List acceptedFilterFields =
- ((SupportsDynamicFiltering) tableSource).listAcceptedFilterFields();
- if (acceptedFilterFields == null || acceptedFilterFields.isEmpty()) {
- return new ArrayList<>();
+ /** Judge whether the join node is suitable one for dpp pattern. */
+ public static boolean isSuitableJoin(Join join) {
+ // Now dynamic partition pruning supports left/right join, inner and semi
+ // join. but now semi join can not join reorder.
+ if (join.getJoinType() != JoinRelType.INNER
+ && join.getJoinType() != JoinRelType.SEMI
+ && join.getJoinType() != JoinRelType.LEFT
+ && join.getJoinType() != JoinRelType.RIGHT) {
+ return false;
}
- List suitableFields = new ArrayList<>();
- // If candidateField not in acceptedFilterFields means dpp rule will not be matched,
- // because we can not prune any partitions according to non-accepted filter fields
- // provided by partition table source.
- for (String candidateField : candidateFields) {
- if (acceptedFilterFields.contains(candidateField)) {
- suitableFields.add(candidateField);
- }
+ JoinInfo joinInfo = join.analyzeCondition();
+ return !joinInfo.leftKeys.isEmpty();
+ }
+
+ /** This class is used to check whether the relNode is dpp dim side. */
+ private static class DppDimSideChecker {
+ private final RelNode relNode;
+ private boolean hasFilter;
+ private boolean hasPartitionedScan;
+ private final List tables = new ArrayList<>();
+
+ public DppDimSideChecker(RelNode relNode) {
+ this.relNode = relNode;
}
- return suitableFields;
- }
+ public boolean isDppDimSide() {
+ visitDimSide(this.relNode);
+ return hasFilter && !hasPartitionedScan && tables.size() == 1;
+ }
- private static RelNode convertDppFactSide(
- RelNode rel,
- ImmutableIntList joinKeys,
- RelNode dimSide,
- ImmutableIntList dimSideJoinKey,
- DppFactSideFactors factSideFactors) {
- if (rel instanceof TableScan) {
- TableScan scan = (TableScan) rel;
- if (scan instanceof BatchPhysicalDynamicFilteringTableSourceScan) {
- // rule applied
- return rel;
- }
- TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
- if (tableSourceTable == null) {
- return rel;
- }
- CatalogTable catalogTable = tableSourceTable.contextResolvedTable().getResolvedTable();
- List partitionKeys = catalogTable.getPartitionKeys();
- if (partitionKeys.isEmpty()) {
- return rel;
- }
- DynamicTableSource tableSource = tableSourceTable.tableSource();
- if (!(tableSource instanceof SupportsDynamicFiltering)
- || !(tableSource instanceof ScanTableSource)) {
- return rel;
- }
+ /**
+ * Visit dim side to judge whether dim side has filter condition and whether dim side's
+ * source table scan is non partitioned scan.
+ */
+ private void visitDimSide(RelNode rel) {
+ // TODO Let visitDimSide more efficient and more accurate. Like a filter on dim table or
+ // a filter for the partition field on fact table.
+ if (rel instanceof TableScan) {
+ TableScan scan = (TableScan) rel;
+ TableSourceTable table = scan.getTable().unwrap(TableSourceTable.class);
+ if (table == null) {
+ return;
+ }
+ if (!hasFilter
+ && table.abilitySpecs() != null
+ && table.abilitySpecs().length != 0) {
+ for (SourceAbilitySpec spec : table.abilitySpecs()) {
+ if (spec instanceof FilterPushDownSpec) {
+ List predicates = ((FilterPushDownSpec) spec).getPredicates();
+ for (RexNode predicate : predicates) {
+ if (isSuitableFilter(predicate)) {
+ hasFilter = true;
+ }
+ }
+ }
+ }
+ }
+ CatalogTable catalogTable = table.contextResolvedTable().getResolvedTable();
+ if (catalogTable.isPartitioned()) {
+ hasPartitionedScan = true;
+ return;
+ }
- // Dpp cannot success if source support aggregate push down, source aggregate push
- // down enabled is true and aggregate push down success.
- if (tableSource instanceof SupportsAggregatePushDown
- && ShortcutUtils.unwrapContext(rel)
- .getTableConfig()
- .get(
- OptimizerConfigOptions
- .TABLE_OPTIMIZER_SOURCE_AGGREGATE_PUSHDOWN_ENABLED)
- && Arrays.stream(tableSourceTable.abilitySpecs())
- .anyMatch(spec -> spec instanceof AggregatePushDownSpec)) {
- return rel;
+ // To ensure there is only one source on the dim side.
+ setTables(table.contextResolvedTable());
+ } else if (rel instanceof HepRelVertex) {
+ visitDimSide(((HepRelVertex) rel).getCurrentRel());
+ } else if (rel instanceof Exchange || rel instanceof Project) {
+ visitDimSide(rel.getInput(0));
+ } else if (rel instanceof Calc) {
+ RexProgram origProgram = ((Calc) rel).getProgram();
+ if (origProgram.getCondition() != null
+ && isSuitableFilter(
+ origProgram.expandLocalRef(origProgram.getCondition()))) {
+ hasFilter = true;
+ }
+ visitDimSide(rel.getInput(0));
+ } else if (rel instanceof Filter) {
+ if (isSuitableFilter(((Filter) rel).getCondition())) {
+ hasFilter = true;
+ }
+ visitDimSide(rel.getInput(0));
+ } else if (rel instanceof Join) {
+ Join join = (Join) rel;
+ visitDimSide(join.getLeft());
+ visitDimSide(join.getRight());
+ } else if (rel instanceof BatchPhysicalGroupAggregateBase) {
+ visitDimSide(((BatchPhysicalGroupAggregateBase) rel).getInput());
+ } else if (rel instanceof Union) {
+ Union union = (Union) rel;
+ for (RelNode input : union.getInputs()) {
+ visitDimSide(input);
+ }
}
+ }
- if (!isNewSource((ScanTableSource) tableSource)) {
- return rel;
+ /**
+ * Not all filter condition suitable for using to filter partitions by dynamic partition
+ * pruning rules. For example, NOT NULL can only filter one default partition which have a
+ * small impact on filtering data.
+ */
+ private static boolean isSuitableFilter(RexNode filterCondition) {
+ switch (filterCondition.getKind()) {
+ case AND:
+ List conjunctions = RelOptUtil.conjunctions(filterCondition);
+ return isSuitableFilter(conjunctions.get(0))
+ || isSuitableFilter(conjunctions.get(1));
+ case OR:
+ List disjunctions = RelOptUtil.disjunctions(filterCondition);
+ return isSuitableFilter(disjunctions.get(0))
+ && isSuitableFilter(disjunctions.get(1));
+ case NOT:
+ return isSuitableFilter(((RexCall) filterCondition).operands.get(0));
+ case EQUALS:
+ case GREATER_THAN:
+ case GREATER_THAN_OR_EQUAL:
+ case LESS_THAN:
+ case LESS_THAN_OR_EQUAL:
+ case NOT_EQUALS:
+ case IN:
+ case LIKE:
+ case CONTAINS:
+ case SEARCH:
+ case IS_FALSE:
+ case IS_NOT_FALSE:
+ case IS_NOT_TRUE:
+ case IS_TRUE:
+ // TODO adding more suitable filters which can filter enough partitions after
+ // using this filter in dynamic partition pruning.
+ return true;
+ default:
+ return false;
}
+ }
- List candidateFields =
- joinKeys.stream()
- .map(i -> scan.getRowType().getFieldNames().get(i))
- .collect(Collectors.toList());
- if (candidateFields.isEmpty()) {
- return rel;
+ private void setTables(ContextResolvedTable catalogTable) {
+ if (tables.size() == 0) {
+ tables.add(catalogTable);
+ } else {
+ for (ContextResolvedTable thisTable : new ArrayList<>(tables)) {
+ if (!thisTable.getIdentifier().equals(catalogTable.getIdentifier())) {
+ tables.add(catalogTable);
+ }
+ }
}
+ }
+ }
- List acceptedFilterFields =
- getSuitableDynamicFilteringFieldsInFactSide(tableSource, candidateFields);
+ /** This class is used to check whether the relNode can be as a fact side in dpp. */
+ private static class DppFactSideChecker {
+ private final RelNode relNode;
+ private final ImmutableIntList joinKeys;
+ private final RelNode dimSide;
+ private final ImmutableIntList dimSideJoinKey;
- if (acceptedFilterFields.size() == 0) {
- return rel;
- }
+ // If join key is not changed in fact side, this value is always true.
+ private boolean isChanged;
- // Apply suitable accepted filter fields to source.
- ((SupportsDynamicFiltering) tableSource).applyDynamicFiltering(acceptedFilterFields);
-
- List acceptedFieldIndices =
- acceptedFilterFields.stream()
- .map(f -> scan.getRowType().getFieldNames().indexOf(f))
- .collect(Collectors.toList());
- List dynamicFilteringFieldIndices = new ArrayList<>();
- for (int i = 0; i < joinKeys.size(); ++i) {
- if (acceptedFieldIndices.contains(joinKeys.get(i))) {
- dynamicFilteringFieldIndices.add(dimSideJoinKey.get(i));
- }
- }
+ public DppFactSideChecker(
+ RelNode relNode,
+ ImmutableIntList joinKeys,
+ RelNode dimSide,
+ ImmutableIntList dimSideJoinKey) {
+ this.relNode = relNode;
+ this.joinKeys = joinKeys;
+ this.dimSide = dimSide;
+ this.dimSideJoinKey = dimSideJoinKey;
+ }
- BatchPhysicalDynamicFilteringDataCollector dynamicFilteringDataCollector =
- createDynamicFilteringConnector(dimSide, dynamicFilteringFieldIndices);
-
- factSideFactors.isChanged = true;
- return new BatchPhysicalDynamicFilteringTableSourceScan(
- scan.getCluster(),
- scan.getTraitSet(),
- scan.getHints(),
- tableSourceTable,
- dynamicFilteringDataCollector);
- } else if (rel instanceof Exchange || rel instanceof Filter) {
- return rel.copy(
- rel.getTraitSet(),
- Collections.singletonList(
- convertDppFactSide(
- rel.getInput(0),
- joinKeys,
- dimSide,
- dimSideJoinKey,
- factSideFactors)));
- } else if (rel instanceof Project) {
- List projects = ((Project) rel).getProjects();
- ImmutableIntList inputJoinKeys = getInputIndices(projects, joinKeys);
- if (inputJoinKeys.isEmpty()) {
- return rel;
- }
+ public Tuple2 canConvertAndConvertDppFactSide() {
+ return Tuple2.of(
+ isChanged, convertDppFactSide(relNode, joinKeys, dimSide, dimSideJoinKey));
+ }
- return rel.copy(
- rel.getTraitSet(),
- Collections.singletonList(
- convertDppFactSide(
- rel.getInput(0),
- inputJoinKeys,
- dimSide,
- dimSideJoinKey,
- factSideFactors)));
- } else if (rel instanceof Calc) {
- Calc calc = (Calc) rel;
- RexProgram program = calc.getProgram();
- List projects =
- program.getProjectList().stream()
- .map(program::expandLocalRef)
- .collect(Collectors.toList());
- ImmutableIntList inputJoinKeys = getInputIndices(projects, joinKeys);
- if (inputJoinKeys.isEmpty()) {
- return rel;
- }
+ private RelNode convertDppFactSide(
+ RelNode rel,
+ ImmutableIntList joinKeys,
+ RelNode dimSide,
+ ImmutableIntList dimSideJoinKey) {
+ if (rel instanceof TableScan) {
+ TableScan scan = (TableScan) rel;
+ if (scan instanceof BatchPhysicalDynamicFilteringTableSourceScan) {
+ // rule applied
+ return rel;
+ }
+ TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
+ if (tableSourceTable == null) {
+ return rel;
+ }
+ CatalogTable catalogTable =
+ tableSourceTable.contextResolvedTable().getResolvedTable();
+ List partitionKeys = catalogTable.getPartitionKeys();
+ if (partitionKeys.isEmpty()) {
+ return rel;
+ }
+ DynamicTableSource tableSource = tableSourceTable.tableSource();
+ if (!(tableSource instanceof SupportsDynamicFiltering)
+ || !(tableSource instanceof ScanTableSource)) {
+ return rel;
+ }
- return rel.copy(
- rel.getTraitSet(),
- Collections.singletonList(
- convertDppFactSide(
- rel.getInput(0),
- inputJoinKeys,
- dimSide,
- dimSideJoinKey,
- factSideFactors)));
- } else if (rel instanceof Join) {
- Join currentJoin = (Join) rel;
- return currentJoin.copy(
- currentJoin.getTraitSet(),
- Arrays.asList(
- convertDppFactSide(
- currentJoin.getLeft(),
- getInputIndices(currentJoin, joinKeys, true),
- dimSide,
- dimSideJoinKey,
- factSideFactors),
- convertDppFactSide(
- currentJoin.getRight(),
- getInputIndices(currentJoin, joinKeys, false),
- dimSide,
- dimSideJoinKey,
- factSideFactors)));
- } else if (rel instanceof Union) {
- Union union = (Union) rel;
- List newInputs = new ArrayList<>();
- for (RelNode input : union.getInputs()) {
- newInputs.add(
- convertDppFactSide(
- input, joinKeys, dimSide, dimSideJoinKey, factSideFactors));
- }
- return union.copy(union.getTraitSet(), newInputs, union.all);
- } else if (rel instanceof BatchPhysicalGroupAggregateBase) {
- BatchPhysicalGroupAggregateBase agg = (BatchPhysicalGroupAggregateBase) rel;
- RelNode input = agg.getInput();
- int[] grouping = agg.grouping();
+ // Dpp cannot success if source have aggregate push down spec.
+ if (Arrays.stream(tableSourceTable.abilitySpecs())
+ .anyMatch(spec -> spec instanceof AggregatePushDownSpec)) {
+ return rel;
+ }
- // If one of joinKey in joinKeys are aggregate function field, dpp will not success.
- for (int k : joinKeys) {
- if (k >= grouping.length) {
+ if (!isNewSource((ScanTableSource) tableSource)) {
return rel;
}
- }
- RelNode convertedRel =
- convertDppFactSide(
- input,
- ImmutableIntList.copyOf(
- joinKeys.stream()
- .map(joinKey -> agg.grouping()[joinKey])
- .collect(Collectors.toList())),
- dimSide,
- dimSideJoinKey,
- factSideFactors);
- return agg.copy(agg.getTraitSet(), Collections.singletonList(convertedRel));
- } else {
- // TODO In the future, we need to support more operators to enrich matchable dpp
- // pattern.
- }
+ List candidateFields =
+ joinKeys.stream()
+ .map(i -> scan.getRowType().getFieldNames().get(i))
+ .collect(Collectors.toList());
+ if (candidateFields.isEmpty()) {
+ return rel;
+ }
- return rel;
- }
+ List acceptedFilterFields =
+ getSuitableDynamicFilteringFieldsInFactSide(tableSource, candidateFields);
- public static boolean isSuitableJoin(Join join) {
- // Now dynamic partition pruning supports left/right join, inner and semi
- // join. but now semi join can not join reorder.
- if (join.getJoinType() != JoinRelType.INNER
- && join.getJoinType() != JoinRelType.SEMI
- && join.getJoinType() != JoinRelType.LEFT
- && join.getJoinType() != JoinRelType.RIGHT) {
- return false;
- }
+ if (acceptedFilterFields.size() == 0) {
+ return rel;
+ }
- JoinInfo joinInfo = join.analyzeCondition();
- return !joinInfo.leftKeys.isEmpty();
- }
+ // Apply suitable accepted filter fields to source.
+ ((SupportsDynamicFiltering) tableSource)
+ .applyDynamicFiltering(acceptedFilterFields);
+
+ List acceptedFieldIndices =
+ acceptedFilterFields.stream()
+ .map(f -> scan.getRowType().getFieldNames().indexOf(f))
+ .collect(Collectors.toList());
+ List dynamicFilteringFieldIndices = new ArrayList<>();
+ for (int i = 0; i < joinKeys.size(); ++i) {
+ if (acceptedFieldIndices.contains(joinKeys.get(i))) {
+ dynamicFilteringFieldIndices.add(dimSideJoinKey.get(i));
+ }
+ }
- private static BatchPhysicalDynamicFilteringDataCollector createDynamicFilteringConnector(
- RelNode dimSide, List dynamicFilteringFieldIndices) {
- final RelDataType outputType =
- ((FlinkTypeFactory) dimSide.getCluster().getTypeFactory())
- .projectStructType(
- dimSide.getRowType(),
- dynamicFilteringFieldIndices.stream().mapToInt(i -> i).toArray());
- return new BatchPhysicalDynamicFilteringDataCollector(
- dimSide.getCluster(),
- dimSide.getTraitSet(),
- ignoreExchange(dimSide),
- outputType,
- dynamicFilteringFieldIndices.stream().mapToInt(i -> i).toArray());
- }
+ BatchPhysicalDynamicFilteringDataCollector dynamicFilteringDataCollector =
+ createDynamicFilteringConnector(dimSide, dynamicFilteringFieldIndices);
+
+ isChanged = true;
+ return new BatchPhysicalDynamicFilteringTableSourceScan(
+ scan.getCluster(),
+ scan.getTraitSet(),
+ scan.getHints(),
+ tableSourceTable,
+ dynamicFilteringDataCollector);
+ } else if (rel instanceof Exchange || rel instanceof Filter) {
+ return rel.copy(
+ rel.getTraitSet(),
+ Collections.singletonList(
+ convertDppFactSide(
+ rel.getInput(0), joinKeys, dimSide, dimSideJoinKey)));
+ } else if (rel instanceof Project) {
+ List projects = ((Project) rel).getProjects();
+ ImmutableIntList inputJoinKeys = getInputIndices(projects, joinKeys);
+ if (inputJoinKeys.isEmpty()) {
+ return rel;
+ }
- private static RelNode ignoreExchange(RelNode dimSide) {
- if (dimSide instanceof Exchange) {
- return dimSide.getInput(0);
- } else {
- return dimSide;
- }
- }
+ return rel.copy(
+ rel.getTraitSet(),
+ Collections.singletonList(
+ convertDppFactSide(
+ rel.getInput(0), inputJoinKeys, dimSide, dimSideJoinKey)));
+ } else if (rel instanceof Calc) {
+ Calc calc = (Calc) rel;
+ RexProgram program = calc.getProgram();
+ List projects =
+ program.getProjectList().stream()
+ .map(program::expandLocalRef)
+ .collect(Collectors.toList());
+ ImmutableIntList inputJoinKeys = getInputIndices(projects, joinKeys);
+ if (inputJoinKeys.isEmpty()) {
+ return rel;
+ }
- /**
- * Visit dim side to judge whether dim side has filter condition and whether dim side's source
- * table scan is non partitioned scan.
- */
- private static void visitDimSide(RelNode rel, DppDimSideFactors dimSideFactors) {
- // TODO Let visitDimSide more efficient and more accurate. Like a filter on dim table or a
- // filter for the partition field on fact table.
- if (rel instanceof TableScan) {
- TableScan scan = (TableScan) rel;
- TableSourceTable table = scan.getTable().unwrap(TableSourceTable.class);
- if (table == null) {
- return;
- }
- if (!dimSideFactors.hasFilter
- && table.abilitySpecs() != null
- && table.abilitySpecs().length != 0) {
- for (SourceAbilitySpec spec : table.abilitySpecs()) {
- if (spec instanceof FilterPushDownSpec) {
- List predicates = ((FilterPushDownSpec) spec).getPredicates();
- for (RexNode predicate : predicates) {
- if (isSuitableFilter(predicate)) {
- dimSideFactors.hasFilter = true;
- }
- }
+ return rel.copy(
+ rel.getTraitSet(),
+ Collections.singletonList(
+ convertDppFactSide(
+ rel.getInput(0), inputJoinKeys, dimSide, dimSideJoinKey)));
+ } else if (rel instanceof Join) {
+ Join currentJoin = (Join) rel;
+ return currentJoin.copy(
+ currentJoin.getTraitSet(),
+ Arrays.asList(
+ convertDppFactSide(
+ currentJoin.getLeft(),
+ getInputIndices(currentJoin, joinKeys, true),
+ dimSide,
+ dimSideJoinKey),
+ convertDppFactSide(
+ currentJoin.getRight(),
+ getInputIndices(currentJoin, joinKeys, false),
+ dimSide,
+ dimSideJoinKey)));
+ } else if (rel instanceof Union) {
+ Union union = (Union) rel;
+ List newInputs = new ArrayList<>();
+ for (RelNode input : union.getInputs()) {
+ newInputs.add(convertDppFactSide(input, joinKeys, dimSide, dimSideJoinKey));
+ }
+ return union.copy(union.getTraitSet(), newInputs, union.all);
+ } else if (rel instanceof BatchPhysicalGroupAggregateBase) {
+ BatchPhysicalGroupAggregateBase agg = (BatchPhysicalGroupAggregateBase) rel;
+ RelNode input = agg.getInput();
+ int[] grouping = agg.grouping();
+
+ // If one of joinKey in joinKeys are aggregate function field, dpp will not success.
+ for (int k : joinKeys) {
+ if (k >= grouping.length) {
+ return rel;
}
}
- }
- CatalogTable catalogTable = table.contextResolvedTable().getResolvedTable();
- if (catalogTable.isPartitioned()) {
- dimSideFactors.hasPartitionedScan = true;
- return;
- }
- // To ensure there is only one source on the dim side.
- dimSideFactors.setTables(table.contextResolvedTable());
- } else if (rel instanceof HepRelVertex) {
- visitDimSide(((HepRelVertex) rel).getCurrentRel(), dimSideFactors);
- } else if (rel instanceof Exchange || rel instanceof Project) {
- visitDimSide(rel.getInput(0), dimSideFactors);
- } else if (rel instanceof Calc) {
- RexProgram origProgram = ((Calc) rel).getProgram();
- if (origProgram.getCondition() != null
- && isSuitableFilter(origProgram.expandLocalRef(origProgram.getCondition()))) {
- dimSideFactors.hasFilter = true;
+ RelNode convertedRel =
+ convertDppFactSide(
+ input,
+ ImmutableIntList.copyOf(
+ joinKeys.stream()
+ .map(joinKey -> agg.grouping()[joinKey])
+ .collect(Collectors.toList())),
+ dimSide,
+ dimSideJoinKey);
+ return agg.copy(agg.getTraitSet(), Collections.singletonList(convertedRel));
+ } else {
+ // TODO In the future, we need to support more operators to enrich matchable dpp
+ // pattern.
}
- visitDimSide(rel.getInput(0), dimSideFactors);
- } else if (rel instanceof Filter) {
- if (isSuitableFilter(((Filter) rel).getCondition())) {
- dimSideFactors.hasFilter = true;
+
+ return rel;
+ }
+
+ private static List getSuitableDynamicFilteringFieldsInFactSide(
+ DynamicTableSource tableSource, List candidateFields) {
+ List acceptedFilterFields =
+ ((SupportsDynamicFiltering) tableSource).listAcceptedFilterFields();
+ if (acceptedFilterFields == null || acceptedFilterFields.isEmpty()) {
+ return new ArrayList<>();
}
- visitDimSide(rel.getInput(0), dimSideFactors);
- } else if (rel instanceof Join) {
- Join join = (Join) rel;
- visitDimSide(join.getLeft(), dimSideFactors);
- visitDimSide(join.getRight(), dimSideFactors);
- } else if (rel instanceof BatchPhysicalGroupAggregateBase) {
- visitDimSide(((BatchPhysicalGroupAggregateBase) rel).getInput(), dimSideFactors);
- } else if (rel instanceof Union) {
- Union union = (Union) rel;
- for (RelNode input : union.getInputs()) {
- visitDimSide(input, dimSideFactors);
+
+ List suitableFields = new ArrayList<>();
+ // If candidateField not in acceptedFilterFields means dpp rule will not be matched,
+ // because we can not prune any partitions according to non-accepted filter fields
+ // provided by partition table source.
+ for (String candidateField : candidateFields) {
+ if (acceptedFilterFields.contains(candidateField)) {
+ suitableFields.add(candidateField);
+ }
}
+
+ return suitableFields;
}
- }
- /**
- * Not all filter condition suitable for using to filter partitions by dynamic partition pruning
- * rules. For example, NOT NULL can only filter one default partition which have a small impact
- * on filtering data.
- */
- private static boolean isSuitableFilter(RexNode filterCondition) {
- switch (filterCondition.getKind()) {
- case AND:
- List conjunctions = RelOptUtil.conjunctions(filterCondition);
- return isSuitableFilter(conjunctions.get(0))
- || isSuitableFilter(conjunctions.get(1));
- case OR:
- List disjunctions = RelOptUtil.disjunctions(filterCondition);
- return isSuitableFilter(disjunctions.get(0))
- && isSuitableFilter(disjunctions.get(1));
- case NOT:
- return isSuitableFilter(((RexCall) filterCondition).operands.get(0));
- case EQUALS:
- case GREATER_THAN:
- case GREATER_THAN_OR_EQUAL:
- case LESS_THAN:
- case LESS_THAN_OR_EQUAL:
- case NOT_EQUALS:
- case IN:
- case LIKE:
- case CONTAINS:
- case SEARCH:
- case IS_FALSE:
- case IS_NOT_FALSE:
- case IS_NOT_TRUE:
- case IS_TRUE:
- // TODO adding more suitable filters which can filter enough partitions after using
- // this filter in dynamic partition pruning.
- return true;
- default:
- return false;
+ private static BatchPhysicalDynamicFilteringDataCollector createDynamicFilteringConnector(
+ RelNode dimSide, List dynamicFilteringFieldIndices) {
+ final RelDataType outputType =
+ ((FlinkTypeFactory) dimSide.getCluster().getTypeFactory())
+ .projectStructType(
+ dimSide.getRowType(),
+ dynamicFilteringFieldIndices.stream()
+ .mapToInt(i -> i)
+ .toArray());
+ return new BatchPhysicalDynamicFilteringDataCollector(
+ dimSide.getCluster(),
+ dimSide.getTraitSet(),
+ ignoreExchange(dimSide),
+ outputType,
+ dynamicFilteringFieldIndices.stream().mapToInt(i -> i).toArray());
}
- }
- /** Returns true if the source is FLIP-27 source, else false. */
- private static boolean isNewSource(ScanTableSource scanTableSource) {
- ScanTableSource.ScanRuntimeProvider provider =
- scanTableSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
- if (provider instanceof SourceProvider) {
- return true;
- } else if (provider instanceof TransformationScanProvider) {
- Transformation> transformation =
- ((TransformationScanProvider) provider)
- .createTransformation(name -> Optional.empty());
- return transformation instanceof SourceTransformation;
- } else if (provider instanceof DataStreamScanProvider) {
- // Suppose DataStreamScanProvider of sources that support dynamic filtering will use new
- // Source. It's not reliable and should be checked.
- // TODO FLINK-28864 check if the source used by the DataStreamScanProvider is actually a
- // new source.
- // This situation will not generate wrong result because it's handled when translating
- // BatchTableSourceScan. The only effect is the physical plan and the exec node plan
- // have DPP nodes, but they do not work in runtime.
- return true;
+ private static RelNode ignoreExchange(RelNode dimSide) {
+ if (dimSide instanceof Exchange) {
+ return dimSide.getInput(0);
+ } else {
+ return dimSide;
+ }
}
- // TODO supports more
- return false;
- }
- private static ImmutableIntList getInputIndices(
- List projects, ImmutableIntList joinKeys) {
- List indices = new ArrayList<>();
- for (int k : joinKeys) {
- RexNode rexNode = projects.get(k);
- if (rexNode instanceof RexInputRef) {
- indices.add(((RexInputRef) rexNode).getIndex());
+ /** Returns true if the source is FLIP-27 source, else false. */
+ private static boolean isNewSource(ScanTableSource scanTableSource) {
+ ScanTableSource.ScanRuntimeProvider provider =
+ scanTableSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
+ if (provider instanceof SourceProvider) {
+ return true;
+ } else if (provider instanceof TransformationScanProvider) {
+ Transformation> transformation =
+ ((TransformationScanProvider) provider)
+ .createTransformation(name -> Optional.empty());
+ return transformation instanceof SourceTransformation;
+ } else if (provider instanceof DataStreamScanProvider) {
+ // Suppose DataStreamScanProvider of sources that support dynamic filtering will use
+ // new Source. It's not reliable and should be checked.
+ // TODO FLINK-28864 check if the source used by the DataStreamScanProvider is
+ // actually a new source. This situation will not generate wrong result because it's
+ // handled when translating BatchTableSourceScan. The only effect is the physical
+ // plan and the exec node plan have DPP nodes, but they do not work in runtime.
+ return true;
}
+ // TODO supports more
+ return false;
}
- return ImmutableIntList.copyOf(indices);
- }
- private static ImmutableIntList getInputIndices(
- Join join, ImmutableIntList joinKeys, boolean isLeft) {
- List indices = new ArrayList<>();
- RelNode left = join.getLeft();
- int leftSize = left.getRowType().getFieldCount();
- for (int k : joinKeys) {
- if (isLeft) {
- if (k < leftSize) {
- indices.add(k);
- }
- } else {
- if (k >= leftSize) {
- indices.add(k - leftSize);
+ private static ImmutableIntList getInputIndices(
+ List projects, ImmutableIntList joinKeys) {
+ List indices = new ArrayList<>();
+ for (int k : joinKeys) {
+ RexNode rexNode = projects.get(k);
+ if (rexNode instanceof RexInputRef) {
+ indices.add(((RexInputRef) rexNode).getIndex());
}
}
+ return ImmutableIntList.copyOf(indices);
}
- return ImmutableIntList.copyOf(indices);
- }
- private static class DppDimSideFactors {
- private boolean hasFilter;
- private boolean hasPartitionedScan;
- private final List tables = new ArrayList<>();
-
- public void setTables(ContextResolvedTable catalogTable) {
- if (tables.size() == 0) {
- tables.add(catalogTable);
- } else {
- for (ContextResolvedTable thisTable : new ArrayList<>(tables)) {
- if (!thisTable.getIdentifier().equals(catalogTable.getIdentifier())) {
- tables.add(catalogTable);
+ private static ImmutableIntList getInputIndices(
+ Join join, ImmutableIntList joinKeys, boolean isLeft) {
+ List indices = new ArrayList<>();
+ RelNode left = join.getLeft();
+ int leftSize = left.getRowType().getFieldCount();
+ for (int k : joinKeys) {
+ if (isLeft) {
+ if (k < leftSize) {
+ indices.add(k);
+ }
+ } else {
+ if (k >= leftSize) {
+ indices.add(k - leftSize);
}
}
}
+ return ImmutableIntList.copyOf(indices);
}
-
- public boolean isDimSide() {
- return hasFilter && !hasPartitionedScan && tables.size() == 1;
- }
- }
-
- /** This class is used to remember fact side messages while recurring in fact side. */
- private static class DppFactSideFactors {
- // If join key is not changed in fact side, this value is always true.
- private boolean isChanged;
}
}