Skip to content

Commit 906183a

Browse files
committed
[CALCITE-2914] Add a new statistic provider, to improve how LatticeSuggester deduces foreign keys
Statistic provider now generates SQL statements to look at a join condition and figure out whether either side is a unique key, and whether the other side is a foreign key (i.e. does an anti-join to verify referential integrity). Create new package org.apache.calcite.statistic, and move some existing classes such as MapSqlStatisticProvider into it. In JDBC adapter, when generating SQL for JDBC tables, use the foreign catalog, schema and table name. In Frameworks, use a query provider with a 30 minute, 1,000 element cache, rather than map provider as default provider. In LatticeSuggesterTest we continue to use a MapSqlStatisticProvider, for performance reasons. Fix deprecated calls to AggregateCall.create added in [CALCITE-1172]. Close #1141
1 parent 6d57ff9 commit 906183a

19 files changed

+905
-159
lines changed

core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcRules.java

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import org.apache.calcite.linq4j.Queryable;
2020
import org.apache.calcite.linq4j.tree.Expression;
21+
import org.apache.calcite.plan.Contexts;
2122
import org.apache.calcite.plan.Convention;
2223
import org.apache.calcite.plan.RelOptCluster;
2324
import org.apache.calcite.plan.RelOptCost;
@@ -60,12 +61,15 @@
6061
import org.apache.calcite.rex.RexNode;
6162
import org.apache.calcite.rex.RexOver;
6263
import org.apache.calcite.rex.RexProgram;
64+
import org.apache.calcite.rex.RexUtil;
6365
import org.apache.calcite.rex.RexVisitorImpl;
6466
import org.apache.calcite.schema.ModifiableTable;
6567
import org.apache.calcite.sql.SqlAggFunction;
6668
import org.apache.calcite.sql.SqlDialect;
6769
import org.apache.calcite.sql.SqlFunction;
6870
import org.apache.calcite.sql.SqlOperator;
71+
import org.apache.calcite.sql.validate.SqlValidatorUtil;
72+
import org.apache.calcite.tools.RelBuilder;
6973
import org.apache.calcite.tools.RelBuilderFactory;
7074
import org.apache.calcite.util.ImmutableBitSet;
7175
import org.apache.calcite.util.Util;
@@ -91,6 +95,126 @@ private JdbcRules() {
9195

9296
protected static final Logger LOGGER = CalciteTrace.getPlannerTracer();
9397

98+
static final RelFactories.ProjectFactory PROJECT_FACTORY =
99+
(input, projects, fieldNames) -> {
100+
final RelOptCluster cluster = input.getCluster();
101+
final RelDataType rowType =
102+
RexUtil.createStructType(cluster.getTypeFactory(), projects,
103+
fieldNames, SqlValidatorUtil.F_SUGGESTER);
104+
return new JdbcProject(cluster, input.getTraitSet(), input, projects,
105+
rowType);
106+
};
107+
108+
static final RelFactories.FilterFactory FILTER_FACTORY =
109+
(input, condition) -> new JdbcRules.JdbcFilter(input.getCluster(),
110+
input.getTraitSet(), input, condition);
111+
112+
static final RelFactories.JoinFactory JOIN_FACTORY =
113+
(left, right, condition, variablesSet, joinType, semiJoinDone) -> {
114+
final RelOptCluster cluster = left.getCluster();
115+
final RelTraitSet traitSet = cluster.traitSetOf(left.getConvention());
116+
try {
117+
return new JdbcJoin(cluster, traitSet, left, right, condition,
118+
variablesSet, joinType);
119+
} catch (InvalidRelException e) {
120+
throw new AssertionError(e);
121+
}
122+
};
123+
124+
static final RelFactories.CorrelateFactory CORRELATE_FACTORY =
125+
(left, right, correlationId, requiredColumns, joinType) -> {
126+
throw new UnsupportedOperationException("JdbcCorrelate");
127+
};
128+
129+
public static final RelFactories.SemiJoinFactory SEMI_JOIN_FACTORY =
130+
(left, right, condition) -> {
131+
throw new UnsupportedOperationException("JdbcSemiJoin");
132+
};
133+
134+
public static final RelFactories.SortFactory SORT_FACTORY =
135+
(input, collation, offset, fetch) -> {
136+
throw new UnsupportedOperationException("JdbcSort");
137+
};
138+
139+
public static final RelFactories.ExchangeFactory EXCHANGE_FACTORY =
140+
(input, distribution) -> {
141+
throw new UnsupportedOperationException("JdbcExchange");
142+
};
143+
144+
public static final RelFactories.SortExchangeFactory SORT_EXCHANGE_FACTORY =
145+
(input, distribution, collation) -> {
146+
throw new UnsupportedOperationException("JdbcSortExchange");
147+
};
148+
149+
public static final RelFactories.AggregateFactory AGGREGATE_FACTORY =
150+
(input, indicator, groupSet, groupSets, aggCalls) -> {
151+
final RelOptCluster cluster = input.getCluster();
152+
final RelTraitSet traitSet = cluster.traitSetOf(input.getConvention());
153+
try {
154+
return new JdbcAggregate(cluster, traitSet, input, false, groupSet,
155+
groupSets, aggCalls);
156+
} catch (InvalidRelException e) {
157+
throw new AssertionError(e);
158+
}
159+
};
160+
161+
public static final RelFactories.MatchFactory MATCH_FACTORY =
162+
(input, pattern, rowType, strictStart, strictEnd, patternDefinitions,
163+
measures, after, subsets, allRows, partitionKeys, orderKeys,
164+
interval) -> {
165+
throw new UnsupportedOperationException("JdbcMatch");
166+
};
167+
168+
public static final RelFactories.SetOpFactory SET_OP_FACTORY =
169+
(kind, inputs, all) -> {
170+
RelNode input = inputs.get(0);
171+
RelOptCluster cluster = input.getCluster();
172+
final RelTraitSet traitSet = cluster.traitSetOf(input.getConvention());
173+
switch (kind) {
174+
case UNION:
175+
return new JdbcUnion(cluster, traitSet, inputs, all);
176+
case INTERSECT:
177+
return new JdbcIntersect(cluster, traitSet, inputs, all);
178+
case EXCEPT:
179+
return new JdbcMinus(cluster, traitSet, inputs, all);
180+
default:
181+
throw new AssertionError("unknown: " + kind);
182+
}
183+
};
184+
185+
public static final RelFactories.ValuesFactory VALUES_FACTORY =
186+
(cluster, rowType, tuples) -> {
187+
throw new UnsupportedOperationException();
188+
};
189+
190+
public static final RelFactories.TableScanFactory TABLE_SCAN_FACTORY =
191+
(cluster, table) -> {
192+
throw new UnsupportedOperationException();
193+
};
194+
195+
public static final RelFactories.SnapshotFactory SNAPSHOT_FACTORY =
196+
(input, period) -> {
197+
throw new UnsupportedOperationException();
198+
};
199+
200+
/** A {@link RelBuilderFactory} that creates a {@link RelBuilder} that will
201+
* create JDBC relational expressions for everything. */
202+
public static final RelBuilderFactory JDBC_BUILDER =
203+
RelBuilder.proto(
204+
Contexts.of(PROJECT_FACTORY,
205+
FILTER_FACTORY,
206+
JOIN_FACTORY,
207+
SEMI_JOIN_FACTORY,
208+
SORT_FACTORY,
209+
EXCHANGE_FACTORY,
210+
SORT_EXCHANGE_FACTORY,
211+
AGGREGATE_FACTORY,
212+
MATCH_FACTORY,
213+
SET_OP_FACTORY,
214+
VALUES_FACTORY,
215+
TABLE_SCAN_FACTORY,
216+
SNAPSHOT_FACTORY));
217+
94218
public static List<RelOptRule> rules(JdbcConvention out) {
95219
return rules(out, RelFactories.LOGICAL_BUILDER);
96220
}

core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcTable.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,19 +75,20 @@
7575
public class JdbcTable extends AbstractQueryableTable
7676
implements TranslatableTable, ScannableTable, ModifiableTable {
7777
private RelProtoDataType protoRowType;
78-
private final JdbcSchema jdbcSchema;
79-
private final String jdbcCatalogName;
80-
private final String jdbcSchemaName;
81-
private final String jdbcTableName;
82-
private final Schema.TableType jdbcTableType;
78+
public final JdbcSchema jdbcSchema;
79+
public final String jdbcCatalogName;
80+
public final String jdbcSchemaName;
81+
public final String jdbcTableName;
82+
public final Schema.TableType jdbcTableType;
8383

8484
JdbcTable(JdbcSchema jdbcSchema, String jdbcCatalogName,
85-
String jdbcSchemaName, String tableName, Schema.TableType jdbcTableType) {
85+
String jdbcSchemaName, String jdbcTableName,
86+
Schema.TableType jdbcTableType) {
8687
super(Object[].class);
87-
this.jdbcSchema = jdbcSchema;
88+
this.jdbcSchema = Objects.requireNonNull(jdbcSchema);
8889
this.jdbcCatalogName = jdbcCatalogName;
8990
this.jdbcSchemaName = jdbcSchemaName;
90-
this.jdbcTableName = tableName;
91+
this.jdbcTableName = Objects.requireNonNull(jdbcTableName);
9192
this.jdbcTableType = Objects.requireNonNull(jdbcTableType);
9293
}
9394

@@ -142,16 +143,18 @@ SqlString generateSql() {
142143
return writer.toSqlString();
143144
}
144145

145-
SqlIdentifier tableName() {
146-
final List<String> strings = new ArrayList<>();
146+
/** Returns the table name, qualified with catalog and schema name if
147+
* applicable, as a parse tree node ({@link SqlIdentifier}). */
148+
public SqlIdentifier tableName() {
149+
final List<String> names = new ArrayList<>(3);
147150
if (jdbcSchema.catalog != null) {
148-
strings.add(jdbcSchema.catalog);
151+
names.add(jdbcSchema.catalog);
149152
}
150153
if (jdbcSchema.schema != null) {
151-
strings.add(jdbcSchema.schema);
154+
names.add(jdbcSchema.schema);
152155
}
153-
strings.add(jdbcTableName);
154-
return new SqlIdentifier(strings, SqlParserPos.ZERO);
156+
names.add(jdbcTableName);
157+
return new SqlIdentifier(names, SqlParserPos.ZERO);
155158
}
156159

157160
public RelNode toRel(RelOptTable.ToRelContext context,

core/src/main/java/org/apache/calcite/materialize/Lattice.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.calcite.sql.SqlUtil;
4848
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
4949
import org.apache.calcite.sql.validate.SqlValidatorUtil;
50+
import org.apache.calcite.statistic.MapSqlStatisticProvider;
5051
import org.apache.calcite.util.ImmutableBitSet;
5152
import org.apache.calcite.util.Litmus;
5253
import org.apache.calcite.util.Pair;

core/src/main/java/org/apache/calcite/materialize/MapSqlStatisticProvider.java

Lines changed: 0 additions & 88 deletions
This file was deleted.

core/src/main/java/org/apache/calcite/materialize/SqlStatisticProvider.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,48 @@
1616
*/
1717
package org.apache.calcite.materialize;
1818

19+
import org.apache.calcite.plan.RelOptTable;
20+
1921
import java.util.List;
2022

2123
/**
22-
* Estimates row counts for tables and columns.
24+
* Estimates row counts for tables and columns, and whether combinations of
25+
* columns form primary/unique and foreign keys.
2326
*
2427
* <p>Unlike {@link LatticeStatisticProvider}, works on raw tables and columns
2528
* and does not need a {@link Lattice}.
29+
*
30+
* <p>It uses {@link org.apache.calcite.plan.RelOptTable} because that contains
31+
* enough information to generate and execute SQL, while not being tied to a
32+
* lattice.
33+
*
34+
* <p>The main implementation,
35+
* {@link org.apache.calcite.statistic.QuerySqlStatisticProvider}, executes
36+
* queries on a populated database. Implementations that use database statistics
37+
* (from {@code ANALYZE TABLE}, etc.) and catalog information (e.g. primary and
38+
* foreign key constraints) would also be possible.
2639
*/
2740
public interface SqlStatisticProvider {
28-
double tableCardinality(List<String> qualifiedTableName);
41+
/** Returns an estimate of the number of rows in {@code table}. */
42+
double tableCardinality(RelOptTable table);
43+
44+
/** Returns whether a join is a foreign key; that is, whether every row in
45+
* the referencing table is matched by at least one row in the referenced
46+
* table.
47+
*
48+
* <p>For example, {@code isForeignKey(EMP, [DEPTNO], DEPT, [DEPTNO])}
49+
* returns true.
50+
*
51+
* <p>To change "at least one" to "exactly one", you also need to call
52+
* {@link #isKey}. */
53+
boolean isForeignKey(RelOptTable fromTable, List<Integer> fromColumns,
54+
RelOptTable toTable, List<Integer> toColumns);
55+
56+
/** Returns whether a collection of columns is a unique (or primary) key.
57+
*
58+
* <p>For example, {@code isKey(EMP, [DEPTNO]} returns true;
59+
* <p>For example, {@code isKey(DEPT, [DEPTNO]} returns false. */
60+
boolean isKey(RelOptTable table, List<Integer> columns);
2961
}
3062

3163
// End SqlStatisticProvider.java

0 commit comments

Comments
 (0)