/
SelectStmt.java
1846 lines (1699 loc) · 77.4 KB
/
SelectStmt.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import java.util.stream.Collectors;
import org.apache.iceberg.Table;
import org.apache.impala.analysis.Path.PathType;
import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.ArrayType;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.DatabaseNotFoundException;
import org.apache.impala.catalog.FeIcebergTable;
import org.apache.impala.catalog.FeIcebergTable.Utils;
import org.apache.impala.catalog.FeKuduTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.KuduColumn;
import org.apache.impala.catalog.MapType;
import org.apache.impala.catalog.StructField;
import org.apache.impala.catalog.StructType;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.ColumnAliasGenerator;
import org.apache.impala.common.Pair;
import org.apache.impala.common.TableAliasGenerator;
import org.apache.impala.common.TreeNode;
import org.apache.impala.rewrite.ExprRewriter;
import org.apache.impala.service.BackendConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
/**
* Representation of a single select block, including GROUP BY, ORDER BY and HAVING
* clauses.
*/
public class SelectStmt extends QueryStmt {
private final static Logger LOG = LoggerFactory.getLogger(SelectStmt.class);
/////////////////////////////////////////
// BEGIN: Members that need to be reset()
protected SelectList selectList_;
protected final List<String> colLabels_; // lower case column labels
protected final FromClause fromClause_;
protected Expr whereClause_;
// Grouping expressions for this select block, including expressions from the original
// query statement and additional expressions that may be added during rewriting.
protected List<Expr> groupingExprs_;
// The original GROUP BY clause with information about grouping sets, etc.
// If there was no original GROUP BY clause, but grouping exprs are added during
// rewriting, this is initialized as a placeholder empty group by list.
// Non-null iff groupingExprs_ is non-null.
private GroupByClause groupByClause_;
protected Expr havingClause_; // original having clause
// havingClause with aliases and agg output resolved
private Expr havingPred_;
// set if we have any kind of aggregation operation, include SELECT DISTINCT
private MultiAggregateInfo multiAggInfo_;
// set if we have AnalyticExprs in the select list/order by clause
private AnalyticInfo analyticInfo_;
// substitutes all exprs in this select block to reference base tables
// directly
private ExprSubstitutionMap baseTblSmap_ = new ExprSubstitutionMap();
// END: Members that need to be reset()
/////////////////////////////////////////
// SQL string of this SelectStmt before inline-view expression substitution.
// Set in analyze().
protected String sqlString_;
SelectStmt(SelectList selectList,
FromClause fromClause,
Expr wherePredicate, GroupByClause groupByClause,
Expr havingPredicate, List<OrderByElement> orderByElements,
LimitElement limitElement) {
super(orderByElements, limitElement);
selectList_ = selectList;
if (fromClause == null) {
fromClause_ = new FromClause();
} else {
fromClause_ = fromClause;
}
whereClause_ = wherePredicate;
groupByClause_ = groupByClause;
if (groupByClause != null) {
groupingExprs_ = Expr.cloneList(groupByClause.getOrigGroupingExprs());
} else {
groupingExprs_ = null;
}
havingClause_ = havingPredicate;
colLabels_ = new ArrayList<>();
havingPred_ = null;
multiAggInfo_ = null;
sortInfo_ = null;
}
/**
* @return the original select list items from the query
*/
public SelectList getSelectList() { return selectList_; }
/**
* @return the HAVING clause post-analysis and with aliases resolved
*/
public Expr getHavingPred() { return havingPred_; }
@Override
public List<String> getColLabels() { return colLabels_; }
public List<TableRef> getTableRefs() { return fromClause_.getTableRefs(); }
public boolean hasWhereClause() { return whereClause_ != null; }
public boolean hasGroupByClause() { return groupingExprs_ != null; }
public Expr getWhereClause() { return whereClause_; }
public void setWhereClause(Expr whereClause) { whereClause_ = whereClause; }
public MultiAggregateInfo getMultiAggInfo() { return multiAggInfo_; }
public boolean hasMultiAggInfo() { return multiAggInfo_ != null; }
public AnalyticInfo getAnalyticInfo() { return analyticInfo_; }
public boolean hasAnalyticInfo() { return analyticInfo_ != null; }
public boolean hasHavingClause() { return havingClause_ != null; }
public ExprSubstitutionMap getBaseTblSmap() { return baseTblSmap_; }
public void addToFromClause(TableRef ref) { fromClause_.add(ref); }
/**
* A simple limit statement has a limit but no order-by,
* group-by, aggregates or analytic functions. Joins are
* allowed if one of the table refs has a limit_to_sample
* hint. The statement can have a WHERE clause if it has an
* always_true hint.
* This method does not explicitly check for subqueries. If the
* subquery occurs in the WHERE, the initial check for simple
* limit may succeed. Subsequently, in the planning process
* the statement rewriter may transform it to a join or an
* equivalent plan and analyze will be called again and this time
* it may or may not satisfy the simple limit criteria.
* Note that FROM clause subquery is generally ok since it is
* a table ref.
*/
public Pair<Boolean, Long> checkSimpleLimitStmt() {
if (hasSimpleLimitEligibleTableRefs()
&& !hasGroupByClause() && !hasOrderByClause()
&& !hasMultiAggInfo() && !hasAnalyticInfo()) {
if (hasWhereClause() && !Expr.IS_ALWAYS_TRUE_PREDICATE.apply(getWhereClause())) {
return null;
}
if (hasLimit()) {
return new Pair<>(Boolean.valueOf(true), getLimit());
} else {
// even if this SELECT statement does not have a LIMIT, it is a
// simple select which may be an inline view and eligible for a
// limit pushdown from an outer block, so we return a non-null value
return new Pair<>(Boolean.valueOf(false), null);
}
}
return null;
}
/**
* Append additional grouping expressions to the select list. Used by StmtRewriter.
*/
protected void addGroupingExprs(List<Expr> addtlGroupingExprs) {
if (groupingExprs_ == null) {
groupByClause_ = new GroupByClause(
Collections.emptyList(), GroupByClause.GroupingSetsType.NONE);
groupingExprs_ = new ArrayList<>();
}
groupingExprs_.addAll(addtlGroupingExprs);
}
private boolean hasSimpleLimitEligibleTableRefs() {
// for single table query blocks, limit pushdown can be considered
// even if no table hint is present
if (getTableRefs().size() == 1) return true;
// if there are multiple table refs, at least 1 should have the hint
// for limit to sample
for (TableRef ref : getTableRefs()) {
if (ref.hasConvertLimitToSampleHint()) return true;
}
return false;
}
/**
* Remove the group by clause. Used by StmtRewriter. This changes the semantics
* of this statement and should only be called when the query is being rewritten
* in a way such that the GROUP BY is *not* required for correctness.
*/
protected void removeGroupBy() {
groupByClause_ = null;
groupingExprs_ = null;
}
// Column alias generator used during query rewriting.
private ColumnAliasGenerator columnAliasGenerator_ = null;
public ColumnAliasGenerator getColumnAliasGenerator() {
if (columnAliasGenerator_ == null) {
columnAliasGenerator_ = new ColumnAliasGenerator(colLabels_, null);
}
return columnAliasGenerator_;
}
// Table alias generator used during query rewriting.
private TableAliasGenerator tableAliasGenerator_ = null;
public TableAliasGenerator getTableAliasGenerator() {
if (tableAliasGenerator_ == null) {
tableAliasGenerator_ = new TableAliasGenerator(analyzer_, null);
}
return tableAliasGenerator_;
}
@Override
public boolean resolveTableMask(Analyzer analyzer) throws AnalysisException {
boolean hasChanges = super.resolveTableMask(analyzer);
// Recurse in all places that could have subqueries. After resolveTableMask() is done
// on the whole AST, SlotRefs referencing the original table refs will be reset and
// re-analyzed to reference the table masking views. So we don't need to deal with
// SlotRefs here.
hasChanges |= fromClause_.resolveTableMask(analyzer);
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
hasChanges |= item.getExpr().resolveTableMask(analyzer);
}
if (whereClause_ != null) hasChanges |= whereClause_.resolveTableMask(analyzer);
if (havingClause_ != null) hasChanges |= havingClause_.resolveTableMask(analyzer);
return hasChanges;
}
/**
* Creates resultExprs and baseTblResultExprs.
*/
@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
if (isAnalyzed()) return;
super.analyze(analyzer);
new SelectAnalyzer(analyzer).analyze();
this.optimizePlainCountStarQueryForIcebergTable();
}
/**
* Algorithm class for the SELECT statement analyzer. Holds
* the analyzer and intermediate state.
*/
private class SelectAnalyzer {
private final Analyzer analyzer_;
private List<Expr> groupingExprsCopy_;
private List<FunctionCallExpr> aggExprs_;
private ExprSubstitutionMap countAllMap_;
private class StarExpandedPathInfo {
public StarExpandedPathInfo(Path expandedPath, Path originalRootPath) {
expandedPath_ = expandedPath;
originalRootPath_ = originalRootPath;
}
public Path getExpandedPath() { return expandedPath_; }
public boolean shouldRegisterForColumnMasking() {
// Empty matched types means this is expanded from star of a catalog table. For
// star of complex types, e.g. my_struct.*, my_array.*, my_map.*, the matched
// types will have the complex type so it's not empty.
// TODO: IMPALA-11712: We should sort out column masking and complex types. The
// above comment may not always be true: in the query
// select a.* from mix_struct_array t, t.struct_in_arr a;
// getMatchedTypes() returns an empty list for the star path even though it is not
// from a catalog table.
// We should also find out whether we can determine from the expanded path alone
// (and not from the path of the star item) whether we need to register it for
// column masking, for example by checking if it is within a complex type.
return originalRootPath_.getMatchedTypes().isEmpty();
}
// The path expanded from a star select list item.
private final Path expandedPath_;
// The original path of the star select list item from which 'expandedPath_' was
// expanded.
// Can be the path of a table, a struct or a collection.
private final Path originalRootPath_;
}
// A map from star 'SelectListItem's to the paths to which they are expanded.
private final Map<SelectListItem, List<StarExpandedPathInfo>> starExpandedPaths_
= new HashMap<>();
private SelectAnalyzer(Analyzer analyzer) {
this.analyzer_ = analyzer;
}
private void analyze() throws AnalysisException {
// Start out with table refs to establish aliases.
fromClause_.analyze(analyzer_);
// Register struct paths (including those expanded from star expressions) before
// analyzeSelectClause() to guarantee tuple memory sharing between structs and
// struct members. See registerStructSlotRefPathsWithAnalyzer().
collectStarExpandedPaths();
registerStructSlotRefPathsWithAnalyzer();
analyzeSelectClause();
verifyResultExprs();
registerViewColumnPrivileges();
analyzeWhereClause();
createSortInfo(analyzer_);
setZippingUnnestSlotRefsFromViews();
// Analyze aggregation-relevant components of the select block (Group By
// clause, select list, Order By clause), substitute AVG with SUM/COUNT,
// create the AggregationInfo, including the agg output tuple, and transform
// all post-agg exprs given AggregationInfo's smap.
analyzeHavingClause();
if (checkForAggregates()) {
verifyAggSemantics();
analyzeGroupingExprs();
collectAggExprs();
buildAggregateExprs();
buildResultExprs();
verifyAggregation();
}
createAnalyticInfo();
if (evaluateOrderBy_) createSortTupleInfo(analyzer_);
// Remember the SQL string before inline-view expression substitution.
sqlString_ = toSql();
if (origSqlString_ == null) origSqlString_ = sqlString_;
resolveInlineViewRefs();
// If this block's select-project-join portion returns an empty result set and the
// block has no aggregation, then mark this block as returning an empty result set.
if (analyzer_.hasEmptySpjResultSet() && multiAggInfo_ == null) {
analyzer_.setHasEmptyResultSet();
}
buildColumnLineageGraph();
analyzer_.setSimpleLimitStatus(checkSimpleLimitStmt());
}
/** Ensure that embedded (struct member) expressions share the tuple memory of their
* enclosing struct expressions by registering struct paths before analysis of select
* list items. Struct paths are registered in order of increasing number of path
* elements - this guarantees that when a struct member path is registered, its
* enclosing struct has already been registered, so Analyzer.registerSlotRef() can
* return the SlotDescriptor already created for the struct member within the struct,
* instead of creating a new SlotDescriptor for the struct member outside of the
* struct.
*
* Note that struct members can themselves be structs: this is the reason that
* ordering by increasing path element number (increasing embedding depth) is
* necessary and simply registering struct paths before other paths is not enough.
*/
private void registerStructSlotRefPathsWithAnalyzer() throws AnalysisException {
Stream<Path> nonStarPaths = collectNonStarPaths();
Stream<Path> starExpandedPaths = starExpandedPaths_.values().stream()
// Get a flat list of paths (and booleans) belonging to all star items.
.flatMap(pathList -> pathList.stream())
// Discard the booleans and keep only the actual paths.
.map((StarExpandedPathInfo pathInfo) -> pathInfo.getExpandedPath());
Stream<Path> allPaths = Stream.concat(nonStarPaths, starExpandedPaths);
List<Path> structPaths = allPaths
.filter(path -> path.destType().isStructType())
.collect(Collectors.toList());
// Sort paths by length in ascending order so that structs that contain other
// structs come before their children.
Collections.sort(structPaths,
Comparator.<Path>comparingInt(path -> path.getMatchedTypes().size()));
for (Path p : structPaths) {
analyzer_.registerSlotRef(p);
}
}
private Stream<Path> collectNonStarPaths() {
Preconditions.checkNotNull(selectList_);
Stream<Expr> selectListExprs = selectList_.getItems().stream()
.filter(elem -> !elem.isStar())
.map(elem -> elem.getExpr());
Stream<Expr> nonSelectListExprs = collectExprsOutsideSelectList();
Stream<Expr> exprs = Stream.concat(selectListExprs, nonSelectListExprs);
// Use a LinkedHashSet for deterministic iteration order.
LinkedHashSet<SlotRef> slotRefs = new LinkedHashSet<>();
exprs.forEach(expr -> expr.collect(SlotRef.class, slotRefs));
return slotRefs.stream()
.map(this::slotRefToResolvedPath)
.filter(path -> path != null);
}
private Stream<Expr> collectExprsOutsideSelectList() {
Stream<Expr> res = Stream.empty();
if (whereClause_ != null) {
res = Stream.concat(res, Stream.of(whereClause_));
}
if (havingClause_ != null) {
res = Stream.concat(res, Stream.of(havingClause_));
}
if (groupingExprs_ != null) {
res = Stream.concat(res, groupingExprs_.stream());
}
if (sortInfo_ != null) {
res = Stream.concat(res, sortInfo_.getSortExprs().stream());
}
if (analyticInfo_ != null) {
res = Stream.concat(res,
analyticInfo_.getAnalyticExprs().stream()
.map(analyticExpr -> (Expr) analyticExpr));
}
return res;
}
private Path slotRefToResolvedPath(SlotRef slotRef) {
try {
Path resolvedPath = analyzer_.resolvePathWithMasking(slotRef.getRawPath(),
PathType.SLOT_REF);
return resolvedPath;
} catch (TableLoadingException e) {
// Should never happen because we only check registered table aliases.
Preconditions.checkState(false);
return null;
} catch (AnalysisException e) {
// Return null if analysis did not succeed. This means this path will not be
// registered here.
return null;
}
}
private void analyzeSelectClause() throws AnalysisException {
// Generate !empty() predicates to filter out empty collections.
// Skip this step when analyzing a WITH-clause because CollectionTableRefs
// do not register collection slots in their parent in that context
// (see CollectionTableRef.analyze()).
if (!analyzer_.hasWithClause()) registerIsNotEmptyPredicates();
// analyze plan hints from select list
selectList_.analyzePlanHints(analyzer_);
// populate resultExprs_, aliasSmap_, and colLabels_
// This additional map is used for performance reasons and not for finding
// ambiguous alias.
Map<String, Expr> existingAliasExprs = new LinkedHashMap<>();
for (int i = 0; i < selectList_.getItems().size(); ++i) {
SelectListItem item = selectList_.getItems().get(i);
if (item.isStar()) {
analyzeStarItem(item);
} else {
analyzeNonStarItem(item, existingAliasExprs, i);
}
}
if (LOG.isTraceEnabled()) {
LOG.trace("Analyzed select clause aliasSmap={}", aliasSmap_.debugString());
}
}
private void analyzeStarItem(SelectListItem item) throws AnalysisException {
Preconditions.checkState(item.isStar());
List<StarExpandedPathInfo> starExpandedPathInfos = starExpandedPaths_.get(item);
// If complex types are not expanded, a star item may expand to zero items, in which
// case starExpandedPaths_ does not have a value for it.
if (starExpandedPathInfos == null) {
Preconditions.checkState(
!analyzer_.getQueryCtx().client_request.query_options.expand_complex_types);
return;
}
for (StarExpandedPathInfo pathInfo : starExpandedPathInfos) {
addStarExpandedPathResultExpr(pathInfo);
}
}
private void analyzeNonStarItem(SelectListItem item,
Map<String, Expr> existingAliasExprs, int selectListPos)
throws AnalysisException {
// Analyze the resultExpr before generating a label to ensure enforcement
// of expr child and depth limits (toColumn() label may call toSql()).
item.getExpr().analyze(analyzer_);
// Check for scalar subquery types which are not supported
List<Subquery> subqueryExprs = new ArrayList<>();
item.getExpr().collect(Subquery.class, subqueryExprs);
for (Subquery s : subqueryExprs) {
Preconditions.checkState(s.getStatement() instanceof SelectStmt);
if (!s.returnsScalarColumn()) {
throw new AnalysisException("A non-scalar subquery is not supported in "
+ "the expression: " + item.getExpr().toSql());
}
if (s.getStatement().isRuntimeScalar()) {
throw new AnalysisException(
"A subquery which may return more than one row is not supported in "
+ "the expression: " + item.getExpr().toSql());
}
if (!((SelectStmt)s.getStatement()).returnsAtMostOneRow()) {
throw new AnalysisException("Only subqueries that are guaranteed to return "
+ "a single row are supported: " + item.getExpr().toSql());
}
}
resultExprs_.add(item.getExpr());
String label = item.toColumnLabel(selectListPos, analyzer_.useHiveColLabels());
SlotRef aliasRef = new SlotRef(label);
Expr existingAliasExpr = existingAliasExprs.get(label);
if (existingAliasExpr != null && !existingAliasExpr.equals(item.getExpr())) {
// If we have already seen this alias, it refers to more than one column and
// therefore is ambiguous.
ambiguousAliasList_.add(aliasRef);
} else {
existingAliasExprs.put(label, item.getExpr());
}
aliasSmap_.put(aliasRef, item.getExpr().clone());
colLabels_.add(label);
}
private void verifyResultExprs() throws AnalysisException {
// Star exprs only expand to the scalar-typed columns/fields, so
// the resultExprs_ could be empty.
if (resultExprs_.isEmpty()) {
throw new AnalysisException("The star exprs expanded to an empty select list " +
"because the referenced tables only have complex-typed columns.\n" +
"Star exprs only expand to scalar-typed columns because " +
"currently not all complex-typed exprs " +
"are supported in the select list.\n" +
"Affected select statement:\n" + toSql());
}
for (Expr expr: resultExprs_) {
if (selectList_.isDistinct() && expr.getType().isComplexType()) {
throw new AnalysisException("Complex types are not supported " +
"in SELECT DISTINCT clauses. Expr: '" + expr.toSql() + "', type: '"
+ expr.getType().toSql() + "'.");
}
if (expr.getType().isArrayType()) {
ArrayType arrayType = (ArrayType) expr.getType();
if (!arrayType.getItemType().isSupported()) {
throw new AnalysisException("Unsupported type '" +
expr.getType().toSql() + "' in '" + expr.toSql() + "'.");
}
} else if (expr.getType().isMapType()) {
MapType mapType = (MapType) expr.getType();
if (!mapType.getKeyType().isSupported()
|| !mapType.getValueType().isSupported()) {
throw new AnalysisException("Unsupported type '" +
expr.getType().toSql() + "' in '" + expr.toSql() + "'.");
}
}
if (!expr.getType().isSupported()) {
throw new AnalysisException("Unsupported type '"
+ expr.getType().toSql() + "' in '" + expr.toSql() + "'.");
}
}
if (TreeNode.contains(resultExprs_, AnalyticExpr.class)) {
if (fromClause_.isEmpty()) {
throw new AnalysisException("Analytic expressions require FROM clause.");
}
// do this here, not after analyzeAggregation(), otherwise the AnalyticExprs
// will get substituted away
if (selectList_.isDistinct()) {
throw new AnalysisException(
"cannot combine SELECT DISTINCT with analytic functions");
}
}
}
private void analyzeWhereClause() throws AnalysisException {
if (whereClause_ == null) return;
whereClause_.analyze(analyzer_);
if (whereClause_.contains(Expr.IS_AGGREGATE)) {
throw new AnalysisException(
"aggregate function not allowed in WHERE clause");
}
whereClause_.checkReturnsBool("WHERE clause", false);
Expr e = whereClause_.findFirstOf(AnalyticExpr.class);
if (e != null) {
throw new AnalysisException(
"WHERE clause must not contain analytic expressions: " + e.toSql());
}
verifyZippingUnnestSlots();
analyzer_.registerConjuncts(whereClause_, false);
}
/**
* Don't allow a WHERE conjunct on an array item that is part of a zipping unnest.
* In case there is only one zipping unnested array this restriction is not needed
* as the UNNEST node has to handle a single array and it's safe to do the filtering
* in the scanner.
*/
private void verifyZippingUnnestSlots() throws AnalysisException {
if (analyzer_.getNumZippingUnnests() <= 1) return;
List<TupleId> zippingUnnestTupleIds = Lists.newArrayList(
analyzer_.getZippingUnnestTupleIds());
List<SlotRef> slotRefsInWhereClause = new ArrayList<>();
whereClause_.collect(SlotRef.class, slotRefsInWhereClause);
for (SlotRef slotRef : slotRefsInWhereClause) {
if (slotRef.isBoundByTupleIds(zippingUnnestTupleIds)) {
throw new AnalysisException("Not allowed to add a filter on an unnested " +
"array under the same select statement: " + slotRef.toSql());
}
}
}
/**
* When zipping unnest is performed using the SQL standard compliant syntax (where
* the unnest is in the FROM clause) the SlotRefs used for the zipping unnest are not
* UnnestExprs as with the other approach (where zipping unnest is in the select list)
* but regular SlotRefs. As a result they have to be marked so that later on anything
* specific for zipping unnest could be executed for them.
* This function identifies the SlotRefs that are for zipping unnesting and sets a
* flag for them. Note, only marks the SlotRefs that are originated form a view.
*/
private void setZippingUnnestSlotRefsFromViews() {
for (TableRef tblRef : fromClause_.getTableRefs()) {
if (!tblRef.isFromClauseZippingUnnest()) continue;
Preconditions.checkState(tblRef instanceof CollectionTableRef);
ExprSubstitutionMap exprSubMap = getBaseTableSMapFromTableRef(tblRef);
if (exprSubMap == null) continue;
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
Expr itemExpr = item.getExpr();
List<SlotRef> slotRefs = new ArrayList<>();
itemExpr.collect(SlotRef.class, slotRefs);
for (SlotRef slotRef : slotRefs) {
Expr subbedExpr = exprSubMap.get(slotRef);
if (subbedExpr == null || !(subbedExpr instanceof SlotRef)) continue;
SlotRef subbedSlotRef = (SlotRef)subbedExpr;
CollectionTableRef collTblRef = (CollectionTableRef)tblRef;
SlotRef collectionSlotRef = (SlotRef)collTblRef.getCollectionExpr();
// Check if 'slotRef' is originated from 'collectionSlotRef'.
if (subbedSlotRef.getDesc().getParent().getId() ==
collectionSlotRef.getDesc().getItemTupleDesc().getId()) {
slotRef.setIsZippingUnnest(true);
}
}
}
}
}
/**
* If 'tblRef' is originated from a view then returns the baseTblSmap from the view.
* Returns false otherwise.
*/
private ExprSubstitutionMap getBaseTableSMapFromTableRef(TableRef tblRef) {
if (tblRef.getResolvedPath().getRootDesc() == null) return null;
if (tblRef.getResolvedPath().getRootDesc().getSourceView() == null) return null;
return tblRef.getResolvedPath().getRootDesc().getSourceView().getBaseTblSmap();
}
/**
* Generates and registers !empty() predicates to filter out empty
* collections directly in the parent scan of collection table refs. This is
* a performance optimization to avoid the expensive processing of empty
* collections inside a subplan that would yield an empty result set.
*
* For correctness purposes, the predicates are generated in cases where we
* can ensure that they will be assigned only to the parent scan, and no other
* plan node.
*
* The conditions are as follows:
* - collection table ref is relative and non-correlated
* - collection table ref represents the rhs of an inner/cross/semi join
* - collection table ref's parent tuple is not outer joined
*
* Example: table T has field A which is of type array<array<int>>.
* 1) ... T join T.A a join a.item a_nest ... :
* all nodes on the path T -> a -> a_nest
* are required so are checked for !empty.
* 2) ... T left outer join T.A a join a.item a_nest ... : no !empty.
* 3) ... T join T.A a left outer join a.item a_nest ... :
* a checked for !empty.
* 4) ... T left outer join T.A a left outer join a.item a_nest ... :
* no !empty.
*
*
* TODO: In some cases, it is possible to generate !empty() predicates for
* a correlated table ref, but in general, that is not correct for non-trivial
* query blocks. For example, if the block with the correlated ref has an
* aggregation then adding a !empty() predicate would incorrectly discard rows
* from the final result set.
*
* TODO: Evaluating !empty() predicates at non-scan nodes interacts poorly with
* our BE projection of collection slots. For example, rows could incorrectly
* be filtered if a !empty() predicate is assigned to a plan node that comes
* after the unnest of the collection that also performs the projection.
*/
private void registerIsNotEmptyPredicates() throws AnalysisException {
for (TableRef tblRef: fromClause_.getTableRefs()) {
Preconditions.checkState(tblRef.isResolved());
if (!(tblRef instanceof CollectionTableRef)) continue;
CollectionTableRef ref = (CollectionTableRef) tblRef;
// Skip non-relative and correlated refs.
if (!ref.isRelative() || ref.isCorrelated()) continue;
// Skip outer and anti joins.
if (ref.getJoinOp().isOuterJoin() || ref.getJoinOp().isAntiJoin()) continue;
// Do not generate a predicate if the parent tuple is outer joined.
if (analyzer_.isOuterJoined(ref.getResolvedPath().getRootDesc().getId()))
continue;
// Don't push down the "is not empty" predicate for zipping unnests if there are
// multiple zipping unnests in the FROM clause.
if (tblRef.isZippingUnnest() && analyzer_.getNumZippingUnnests() > 1) {
continue;
}
IsNotEmptyPredicate isNotEmptyPred =
new IsNotEmptyPredicate(ref.getCollectionExpr().clone());
isNotEmptyPred.analyze(analyzer_);
// Register the predicate as an On-clause conjunct because it should only
// affect the result of this join and not the whole FROM clause.
analyzer_.registerOnClauseConjuncts(
Lists.<Expr>newArrayList(isNotEmptyPred), ref);
}
}
/**
* Populates baseTblSmap_ with our combined inline view smap and creates
* baseTblResultExprs.
*/
private void resolveInlineViewRefs()
throws AnalysisException {
// Gather the inline view substitution maps from the enclosed inline views
for (TableRef tblRef: fromClause_) {
if (tblRef instanceof InlineViewRef) {
InlineViewRef inlineViewRef = (InlineViewRef) tblRef;
baseTblSmap_ =
ExprSubstitutionMap.combine(baseTblSmap_, inlineViewRef.getBaseTblSmap());
}
}
baseTblResultExprs_ =
Expr.trySubstituteList(resultExprs_, baseTblSmap_, analyzer_, false);
if (LOG.isTraceEnabled()) {
LOG.trace("baseTblSmap_: " + baseTblSmap_.debugString());
LOG.trace("resultExprs: " + Expr.debugString(resultExprs_));
LOG.trace("baseTblResultExprs: " + Expr.debugString(baseTblResultExprs_));
}
}
/**
* Resolves the given raw path as a STAR path and checks its legality.
* Returns the resolved legal path, or throws if the raw path could not
* be resolved or is an illegal star path.
*/
private Path analyzeStarPath(List<String> rawPath, Analyzer analyzer)
throws AnalysisException {
Path resolvedPath = null;
try {
resolvedPath = analyzer.resolvePathWithMasking(rawPath, PathType.STAR);
} catch (TableLoadingException e) {
// Should never happen because we only check registered table aliases.
Preconditions.checkState(false);
}
Preconditions.checkNotNull(resolvedPath);
return resolvedPath;
}
/**
* Expand "*" select list item, ignoring semi-joined tables because those are
* currently illegal in any select list (even for inline views, etc.). Also ignores
* complex-typed fields for backwards compatibility unless EXPAND_COMPLEX_TYPES is set
* to true.
*/
private void expandStar(SelectListItem selectListItem) throws AnalysisException {
if (fromClause_.isEmpty()) {
throw new AnalysisException(
"'*' expression in select list requires FROM clause.");
}
// expand in From clause order
for (TableRef tableRef: fromClause_) {
if (tableRef.isHidden()) continue;
if (analyzer_.isSemiJoined(tableRef.getId())) continue;
Path resolvedPath = new Path(tableRef.getDesc(),
Collections.<String>emptyList());
Preconditions.checkState(resolvedPath.resolve());
expandStar(selectListItem, resolvedPath);
}
}
/**
* Expand "path.*" from a resolved path, ignoring complex-typed fields for backwards
* compatibility unless EXPAND_COMPLEX_TYPES is set to true.
*/
private void expandStar(SelectListItem selectListItem, Path resolvedPath)
throws AnalysisException {
Preconditions.checkState(resolvedPath.isResolved());
if (resolvedPath.destTupleDesc() != null &&
resolvedPath.destTupleDesc().getTable() != null &&
resolvedPath.destTupleDesc().getPath().getMatchedTypes().isEmpty()) {
// The resolved path targets a registered tuple descriptor of a catalog
// table. Expand the '*' based on the Hive-column order.
TupleDescriptor tupleDesc = resolvedPath.destTupleDesc();
FeTable table = tupleDesc.getTable();
for (Column c: table.getColumnsInHiveOrder()) {
// Omit auto-incrementing column for Kudu table since it's a hidden column.
if (c instanceof KuduColumn && ((KuduColumn)c).isAutoIncrementing()) continue;
addStarExpandedPath(selectListItem, resolvedPath, c.getName());
}
} else {
// The resolved path does not target the descriptor of a catalog table.
// Expand '*' based on the destination type of the resolved path.
Preconditions.checkState(resolvedPath.destType().isStructType());
StructType structType = (StructType) resolvedPath.destType();
Preconditions.checkNotNull(structType);
// Star expansion for references to nested collections.
// Collection Type Star Expansion
// array<int> --> item
// array<struct<f1,f2,...,fn>> --> f1, f2, ..., fn
// map<int,int> --> key, value
// map<int,struct<f1,f2,...,fn>> --> key, f1, f2, ..., fn
if (structType instanceof CollectionStructType) {
CollectionStructType cst = (CollectionStructType) structType;
if (cst.isMapStruct()) {
addStarExpandedPath(selectListItem, resolvedPath, Path.MAP_KEY_FIELD_NAME);
}
if (cst.getOptionalField().getType().isStructType()) {
structType = (StructType) cst.getOptionalField().getType();
for (StructField f: structType.getFields()) {
addStarExpandedPath(selectListItem, resolvedPath,
cst.getOptionalField().getName(), f.getName());
}
} else if (cst.isMapStruct()) {
addStarExpandedPath(selectListItem, resolvedPath, Path.MAP_VALUE_FIELD_NAME);
} else {
addStarExpandedPath(selectListItem, resolvedPath, Path.ARRAY_ITEM_FIELD_NAME);
}
} else {
// Default star expansion.
for (StructField f: structType.getFields()) {
if (f.isHidden()) continue;
addStarExpandedPath(selectListItem, resolvedPath, f.getName());
}
}
}
}
/**
* Expand star items to paths and store them in 'starExpandedPaths_'.
*/
private void collectStarExpandedPaths() throws AnalysisException {
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) {
if (item.getRawPath() != null) {
Path resolvedPath = analyzeStarPath(item.getRawPath(), analyzer_);
expandStar(item, resolvedPath);
} else {
expandStar(item);
}
}
}
}
/**
* Helper function used during star expansion to add a single expanded path based on a
* given raw path to be resolved relative to an existing path.
*/
private void addStarExpandedPath(SelectListItem selectListItem, Path resolvedRootPath,
String... relRawPath) throws AnalysisException {
Path starExpandedPath = Path.createRelPath(resolvedRootPath, relRawPath);
Preconditions.checkState(starExpandedPath.resolve());
if (starExpandedPath.destType().isComplexType() &&
!starExpandedPath.comesFromIcebergMetadataTable() &&
!analyzer_.getQueryCtx().client_request.query_options.expand_complex_types) {
return;
}
if (!starExpandedPaths_.containsKey(selectListItem)) {
starExpandedPaths_.put(selectListItem, new ArrayList<>());
}
List<StarExpandedPathInfo> pathsOfStarItem = starExpandedPaths_.get(selectListItem);
pathsOfStarItem.add(new StarExpandedPathInfo(starExpandedPath, resolvedRootPath));
}
private void addStarExpandedPathResultExpr(StarExpandedPathInfo starExpandedPathInfo)
throws AnalysisException {
Preconditions.checkState(starExpandedPathInfo.getExpandedPath().isResolved());
SlotDescriptor slotDesc = analyzer_.registerSlotRef(
starExpandedPathInfo.getExpandedPath(), false);
SlotRef slotRef = new SlotRef(slotDesc);
if (slotRef.getType().isStructType()) {
slotRef.checkForUnsupportedStructFeatures();
}
Preconditions.checkState(slotRef.isAnalyzed(),
"Analysis should be done in constructor");
if (starExpandedPathInfo.shouldRegisterForColumnMasking()) {
analyzer_.registerColumnForMasking(slotDesc);
}
resultExprs_.add(slotRef);
final List<String> starExpandedRawPath = starExpandedPathInfo
.getExpandedPath().getRawPath();
colLabels_.add(starExpandedRawPath.get(starExpandedRawPath.size() - 1));
}
/**
* Registers view column privileges only when the view a catalog view.
*/
private void registerViewColumnPrivileges() {
for (TableRef tableRef: fromClause_.getTableRefs()) {
if (!(tableRef instanceof InlineViewRef)) continue;
InlineViewRef inlineViewRef = (InlineViewRef) tableRef;
FeView view = inlineViewRef.getView();
// For, local views (CTE), the view definition is explicitly defined in the query,
// this requires registering base column privileges instead of view column
// privileges. For example:
//
// with v as (select id as foo from functional.alltypes) select foo from v
//
// The query will register "functional.alltypes.id" column instead of
// "v.foo" column. This behavior is similar to inline views, e.g.
//
// select foo from (select id as foo from functional.alltypes) v
//
// The query will register "functional.alltypes.id" column instead of "v.foo"
// column.
//
// Contrasting this with catalog views.
//
// create view v(foo) as select id from functional.alltypes
// select v.foo from v
//
// The "select v.foo from v" query requires "v.foo" view column privilege because
// the "v" view definition is not exposed in the query. This behavior is also
// consistent with view access, such that the query requires a "select" privilege
// on "v" view and not "functional.alltypes" table.
boolean isCatalogView = view != null && !view.isLocalView();
if (!isCatalogView) continue;
for (Expr expr: getResultExprs()) {
List<Expr> slotRefs = new ArrayList<>();
expr.collectAll(Predicates.instanceOf(SlotRef.class), slotRefs);
for (Expr e: slotRefs) {
SlotRef slotRef = (SlotRef) e;
analyzer_.registerPrivReq(builder -> builder
.allOf(Privilege.SELECT)
.onColumn(view.getDb().getName(), view.getName(),
slotRef.getDesc().getLabel(), view.getOwnerUser())
.build());
}
}
}
}
/**
* Analyze the HAVING clause. The HAVING clause is a predicate, not a list
* (like GROUP BY or ORDER BY) and so cannot contain ordinals.
*
* At present, Impala's alias substitution logic only works for top-level
* expressions in a list. Since HAVING is a predicate, alias substitution
* does not work (the top-level predicate in HAVING is a Boolean expression.)
*
* TODO: Modify alias substitution to work like column resolution so that aliases
* can appear anywhere in the expression. Then, enable alias substitution in HAVING.
*/
private void analyzeHavingClause() throws AnalysisException {
// Analyze the HAVING clause first so we can check if it contains aggregates.
// We need to analyze/register it even if we are not computing aggregates.
if (havingClause_ == null) return;
List<Expr> subqueries = new ArrayList<>();
havingClause_.collectAll(Predicates.instanceOf(Subquery.class), subqueries);