-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
general_funcs.go
851 lines (801 loc) · 31.7 KB
/
general_funcs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package xform
import (
"container/list"
"math"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
"github.com/cockroachdb/cockroach/pkg/sql/opt/idxconstraint"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/opt/ordering"
"github.com/cockroachdb/cockroach/pkg/sql/opt/partialidx"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/errors"
)
// CustomFuncs contains all the custom match and replace functions used by the
// exploration rules. The unnamed norm.CustomFuncs allows CustomFuncs to provide
// a clean interface for calling functions from both the xform and norm packages
// using the same struct.
type CustomFuncs struct {
norm.CustomFuncs
e *explorer
im partialidx.Implicator
}
// Init initializes a new CustomFuncs with the given explorer.
func (c *CustomFuncs) Init(e *explorer) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*c = CustomFuncs{
e: e,
}
c.CustomFuncs.Init(e.f)
c.im.Init(e.f, e.mem.Metadata(), e.evalCtx)
}
// IsCanonicalScan returns true if the given ScanPrivate is an original
// unaltered primary index Scan operator (i.e. unconstrained and not limited).
func (c *CustomFuncs) IsCanonicalScan(scan *memo.ScanPrivate) bool {
return scan.IsCanonical()
}
// HasInvertedIndexes returns true if at least one inverted index is defined on
// the Scan operator's table.
func (c *CustomFuncs) HasInvertedIndexes(scanPrivate *memo.ScanPrivate) bool {
md := c.e.mem.Metadata()
tab := md.Table(scanPrivate.Table)
// Skip the primary index because it cannot be inverted.
for i := 1; i < tab.IndexCount(); i++ {
if tab.Index(i).IsInverted() {
return true
}
}
return false
}
// RemapScanColsInFilter returns a new FiltersExpr where columns in src's table
// are replaced with columns of the same ordinal in dst's table. src and dst
// must scan the same base table.
func (c *CustomFuncs) RemapScanColsInFilter(
filters memo.FiltersExpr, src, dst *memo.ScanPrivate,
) memo.FiltersExpr {
newFilters := c.remapScanColsInScalarExpr(&filters, src, dst).(*memo.FiltersExpr)
return *newFilters
}
func (c *CustomFuncs) remapScanColsInScalarExpr(
scalar opt.ScalarExpr, src, dst *memo.ScanPrivate,
) opt.ScalarExpr {
md := c.e.mem.Metadata()
if md.Table(src.Table).ID() != md.Table(dst.Table).ID() {
panic(errors.AssertionFailedf("scans must have the same base table"))
}
if src.Cols.Len() != dst.Cols.Len() {
panic(errors.AssertionFailedf("scans must have the same number of columns"))
}
// Remap each column in src to a column in dst.
var colMap opt.ColMap
for srcCol, ok := src.Cols.Next(0); ok; srcCol, ok = src.Cols.Next(srcCol + 1) {
ord := src.Table.ColumnOrdinal(srcCol)
dstCol := dst.Table.ColumnID(ord)
colMap.Set(int(srcCol), int(dstCol))
}
return c.e.f.RemapCols(scalar, colMap)
}
// RemapJoinColsInFilter returns a new FiltersExpr where columns in leftSrc's
// table are replaced with columns of the same ordinal in leftDst's table and
// rightSrc's table are replaced with columns of the same ordinal in rightDst's
// table. leftSrc and leftDst must scan the same base table. rightSrc and
// rightDst must scan the same base table.
func (c *CustomFuncs) remapJoinColsInFilter(
filters memo.FiltersExpr, leftSrc, leftDst, rightSrc, rightDst *memo.ScanPrivate,
) memo.FiltersExpr {
newFilters := c.remapJoinColsInScalarExpr(&filters, leftSrc, leftDst, rightSrc, rightDst).(*memo.FiltersExpr)
return *newFilters
}
// remapJoinColsInScalarExpr remaps ColumnIDs in a scalar expression involving
// columns from a join of two base table scans.
func (c *CustomFuncs) remapJoinColsInScalarExpr(
scalar opt.ScalarExpr, leftSrc, leftDst, rightSrc, rightDst *memo.ScanPrivate,
) opt.ScalarExpr {
md := c.e.mem.Metadata()
if md.Table(leftSrc.Table).ID() != md.Table(leftDst.Table).ID() {
panic(errors.AssertionFailedf("left scans must have the same base table"))
}
if md.Table(rightSrc.Table).ID() != md.Table(rightDst.Table).ID() {
panic(errors.AssertionFailedf("right scans must have the same base table"))
}
if leftSrc.Cols.Len() != leftDst.Cols.Len() {
panic(errors.AssertionFailedf("left scans must have the same number of columns"))
}
if rightSrc.Cols.Len() != rightDst.Cols.Len() {
panic(errors.AssertionFailedf("rightscans must have the same number of columns"))
}
// Remap each column in leftSrc to a column in leftDst.
var colMap opt.ColMap
for srcCol, ok := leftSrc.Cols.Next(0); ok; srcCol, ok = leftSrc.Cols.Next(srcCol + 1) {
ord := leftSrc.Table.ColumnOrdinal(srcCol)
dstCol := leftDst.Table.ColumnID(ord)
colMap.Set(int(srcCol), int(dstCol))
}
for srcCol, ok := rightSrc.Cols.Next(0); ok; srcCol, ok = rightSrc.Cols.Next(srcCol + 1) {
ord := rightSrc.Table.ColumnOrdinal(srcCol)
dstCol := rightDst.Table.ColumnID(ord)
colMap.Set(int(srcCol), int(dstCol))
}
return c.e.f.RemapCols(scalar, colMap)
}
// checkConstraintFilters generates all filters that we can derive from the
// check constraints. These are constraints that have been validated and are
// non-nullable. We only use non-nullable check constraints because they
// behave differently from filters on NULL. Check constraints are satisfied
// when their expression evaluates to NULL, while filters are not.
//
// For example, the check constraint a > 1 is satisfied if a is NULL but the
// equivalent filter a > 1 is not.
//
// These filters do not really filter any rows, they are rather facts or
// guarantees about the data but treating them as filters may allow some
// indexes to be constrained and used. Consider the following example:
//
// CREATE TABLE abc (
//
// a INT PRIMARY KEY,
// b INT NOT NULL,
// c STRING NOT NULL,
// CHECK (a < 10 AND a > 1),
// CHECK (b < 10 AND b > 1),
// CHECK (c in ('first', 'second')),
// INDEX secondary (b, a),
// INDEX tertiary (c, b, a))
//
// Now consider the query: SELECT a, b WHERE a > 5
//
// Notice that the filter provided previously wouldn't let the optimizer use
// the secondary or tertiary indexes. However, given that we can use the
// constraints on a, b and c, we can actually use the secondary and tertiary
// indexes. In fact, for the above query we can do the following:
//
// select
//
// ├── columns: a:1(int!null) b:2(int!null)
// ├── scan abc@tertiary
// │ ├── columns: a:1(int!null) b:2(int!null)
// │ └── constraint: /3/2/1: [/'first'/2/6 - /'first'/9/9] [/'second'/2/6 - /'second'/9/9]
// └── filters
// └── gt [type=bool]
// ├── variable: a [type=int]
// └── const: 5 [type=int]
//
// Similarly, the secondary index could also be used. All such index scans
// will be added to the memo group.
func (c *CustomFuncs) checkConstraintFilters(tabID opt.TableID) memo.FiltersExpr {
md := c.e.mem.Metadata()
tabMeta := md.TableMeta(tabID)
if tabMeta.Constraints == nil {
return memo.FiltersExpr{}
}
filters := *tabMeta.Constraints.(*memo.FiltersExpr)
// Limit slice capacity to allow the caller to append if necessary.
return filters[:len(filters):len(filters)]
}
func (c *CustomFuncs) initIdxConstraintForIndex(
requiredFilters, optionalFilters memo.FiltersExpr, tabID opt.TableID, indexOrd int,
) (ic *idxconstraint.Instance) {
ic = &idxconstraint.Instance{}
// Fill out data structures needed to initialize the idxconstraint library.
// Use LaxKeyColumnCount, since all columns <= LaxKeyColumnCount are
// guaranteed to be part of each row's key (i.e. not stored in row's value,
// which does not take part in an index scan). Note that the OrderingColumn
// slice cannot be reused, as Instance.Init can use it in the constraint.
md := c.e.mem.Metadata()
tabMeta := md.TableMeta(tabID)
index := tabMeta.Table.Index(indexOrd)
ps := tabMeta.IndexPartitionLocality(index.Ordinal())
columns := make([]opt.OrderingColumn, index.LaxKeyColumnCount())
var notNullCols opt.ColSet
for i := range columns {
col := index.Column(i)
ordinal := col.Ordinal()
nullable := col.IsNullable()
colID := tabID.ColumnID(ordinal)
columns[i] = opt.MakeOrderingColumn(colID, col.Descending)
if !nullable {
notNullCols.Add(colID)
}
}
// Generate index constraints.
ic.Init(
requiredFilters, optionalFilters,
columns, notNullCols, tabMeta.ComputedCols,
true /* consolidate */, c.e.evalCtx, c.e.f, ps,
)
return ic
}
// computedColFilters generates all filters that can be derived from the list of
// computed column expressions from the given table. A computed column can be
// used as a filter when it has a constant value. That is true when:
//
// 1. All other columns it references are constant, because other filters in
// the query constrain them to be so.
// 2. All functions in the computed column expression can be folded into
// constants (i.e. they do not have problematic side effects).
//
// Note that computed columns can depend on other computed columns; in general
// the dependencies form an acyclic directed graph. computedColFilters will
// return filters for all constant computed columns, regardless of the order of
// their dependencies.
//
// As with checkConstraintFilters, computedColFilters do not really filter any
// rows, they are rather facts or guarantees about the data. Treating them as
// filters may allow some indexes to be constrained and used. Consider the
// following example:
//
// CREATE TABLE t (
// k INT NOT NULL,
// hash INT AS (k % 4) STORED,
// PRIMARY KEY (hash, k)
// )
//
// SELECT * FROM t WHERE k = 5
//
// Notice that the filter provided explicitly wouldn't allow the optimizer to
// seek using the primary index (it would have to fall back to a table scan).
// However, column "hash" can be proven to have the constant value of 1, since
// it's dependent on column "k", which has the constant value of 5. This enables
// usage of the primary index:
//
// scan t
// ├── columns: k:1(int!null) hash:2(int!null)
// ├── constraint: /2/1: [/1/5 - /1/5]
// ├── key: (2)
// └── fd: ()-->(1)
//
// The values of both columns in that index are known, enabling a single value
// constraint to be generated.
func (c *CustomFuncs) computedColFilters(
scanPrivate *memo.ScanPrivate, requiredFilters, optionalFilters memo.FiltersExpr,
) memo.FiltersExpr {
tabMeta := c.e.mem.Metadata().TableMeta(scanPrivate.Table)
if len(tabMeta.ComputedCols) == 0 {
return nil
}
// Start with set of constant columns, as derived from the list of filter
// conditions.
constCols := make(constColsMap)
c.findConstantFilterCols(constCols, scanPrivate, requiredFilters)
c.findConstantFilterCols(constCols, scanPrivate, optionalFilters)
if len(constCols) == 0 {
// No constant values could be derived from filters, so assume that there
// are also no constant computed columns.
return nil
}
// Construct a new filter condition for each computed column that is
// constant (i.e. all of its variables are in the constCols set).
var computedColFilters memo.FiltersExpr
for colID := range tabMeta.ComputedCols {
if c.tryFoldComputedCol(tabMeta, colID, constCols) {
constVal := constCols[colID]
// Note: Eq is not correct here because of NULLs.
eqOp := c.e.f.ConstructIs(c.e.f.ConstructVariable(colID), constVal)
computedColFilters = append(computedColFilters, c.e.f.ConstructFiltersItem(eqOp))
}
}
return computedColFilters
}
// constColsMap maps columns to constant values that we can infer from query
// filters.
//
// Note that for composite types, the constant value is not interchangeable with
// the column in all contexts. Composite types are types which can have
// logically equal but not identical values, like the decimals 1.0 and 1.00.
//
// For example:
//
// CREATE TABLE t (
// d DECIMAL,
// c DECIMAL AS (d*10) STORED
// );
// INSERT INTO t VALUES (1.0), (1.00), (1.000);
// SELECT c::STRING FROM t WHERE d=1;
// ----
// 10.0
// 10.00
// 10.000
//
// We can infer that c has a constant value of 1 but we can't replace it with 1
// in any expression.
type constColsMap map[opt.ColumnID]opt.ScalarExpr
// findConstantFilterCols adds to constFilterCols mappings from table column ID
// to the constant value of that column. It does this by iterating over the
// given lists of filters and finding expressions that constrain columns to a
// single constant value. For example:
//
// x = 5 AND y = 'foo'
//
// This would add a mapping from x => 5 and y => 'foo', which constants can
// then be used to prove that dependent computed columns are also constant.
func (c *CustomFuncs) findConstantFilterCols(
constFilterCols constColsMap, scanPrivate *memo.ScanPrivate, filters memo.FiltersExpr,
) {
tab := c.e.mem.Metadata().Table(scanPrivate.Table)
for i := range filters {
// If filter constraints are not tight, then no way to derive constant
// values.
props := filters[i].ScalarProps()
if !props.TightConstraints {
continue
}
// Iterate over constraint conjuncts with a single column and single
// span having a single key.
for i, n := 0, props.Constraints.Length(); i < n; i++ {
cons := props.Constraints.Constraint(i)
if cons.Columns.Count() != 1 || cons.Spans.Count() != 1 {
continue
}
// Skip columns that aren't in the scanned table.
colID := cons.Columns.Get(0).ID()
if !scanPrivate.Cols.Contains(colID) {
continue
}
colTyp := tab.Column(scanPrivate.Table.ColumnOrdinal(colID)).DatumType()
span := cons.Spans.Get(0)
if !span.HasSingleKey(c.e.evalCtx) {
continue
}
datum := span.StartKey().Value(0)
constFilterCols[colID] = c.e.f.ConstructConstVal(datum, colTyp)
}
}
}
// splitScanIntoUnionScans tries to find a UnionAll of Scan operators (with an
// optional hard limit) that each scan over a single key from the original
// Scan's constraints. The UnionAll is returned if the scans can provide the
// given ordering, and if the statistics suggest that splitting the scan could
// be beneficial. If no such UnionAll of Scans can be found, ok=false is
// returned. This is beneficial in cases where an ordering is required on a
// suffix of the index columns, and constraining the first column(s) allows the
// scan to provide that ordering.
func (c *CustomFuncs) splitScanIntoUnionScans(
ordering props.OrderingChoice,
scan memo.RelExpr,
sp *memo.ScanPrivate,
cons *constraint.Constraint,
limit int,
keyPrefixLength int,
) (_ memo.RelExpr, ok bool) {
return c.splitScanIntoUnionScansOrSelects(ordering, scan, sp, cons, limit, keyPrefixLength, nil /* filters */)
}
// splitScanIntoUnionScansOrSelects tries to find a UnionAll of Scan operators,
// (with an optional hard limit), or UnionAll of Selects from Scans that each
// scan over a single key from the original Scan's constraints. The UnionAll is
// returned if the scans/selects can provide the given ordering, and if the
// statistics suggest that splitting them could be beneficial. If no such
// UnionAll of Scans/Selects can be found, ok=false is returned. This is
// beneficial in cases where an ordering is required on a suffix of the index
// columns, and constraining the first column(s) allows the scan to provide that
// ordering.
// TODO(drewk): handle inverted scans.
func (c *CustomFuncs) splitScanIntoUnionScansOrSelects(
ordering props.OrderingChoice,
scan memo.RelExpr,
sp *memo.ScanPrivate,
cons *constraint.Constraint,
limit int,
keyPrefixLength int,
filters memo.FiltersExpr,
) (_ memo.RelExpr, ok bool) {
// Hash index bucket count may be higher than 16, especially in large table
// cases. Pick a default maxScanCount sufficiently high to avoid disabling
// this optimization for such indexes with high bucket count, but not so high
// that we might plan thousands of Scans. If IN list check constraints on the
// first index column exist, use that to find the number for maxScanCount.
// TODO(msirek): Is there a more efficient way to represent this plan when #
// of scans is high, for example, introduce new operation types like
// "SkipScanExpr", "SkipSelectExpr", "LooseScanExpr", "LooseSelectExpr" (see
// https://wiki.postgresql.org/wiki/Loose_indexscan and
// https://github.com/cockroachdb/cockroach/pull/38216), or maybe just set
// new read modes in the pre-existing ScanExpr and SelectExpr?
// This would allow us to represent order-preserving skip scans without the
// need for large UNION ALL expressions.
//
// TODO(msirek): We may want to lift this max entirely at some point.
// maxScanCount is the maximum number of scans to produce if there is no
// non-null check constraint defined on the first index column.
maxScanCount := 256
// hardMaxScanCount is the upper limit on what we'll attempt to increase
// maxScanCount to using numAllowedValues() based on check constraint values.
// This is configurable using a session setting opt_split_scan_limit.
// If OptSplitScanLimit is below maxScanCount, we will decrease maxScanCount
// to that value because a hard limit should never be lower than a soft limit.
hardMaxScanCount := int(c.e.evalCtx.SessionData().OptSplitScanLimit)
// Hack: If OptSplitScanLimit is set to 0, it may be because it is
// uninitialized for an internal SQL execution. Set it to the old maximum of
// 16 to ensure there are no regressions.
if hardMaxScanCount == 0 {
// TODO(msirek): Remove this code once the following PR ships:
// https://github.com/cockroachdb/cockroach/pull/73267
hardMaxScanCount = 16
}
if hardMaxScanCount < maxScanCount {
maxScanCount = hardMaxScanCount
}
if cons.Columns.Count() == 0 {
return
}
firstColID := cons.Columns.Get(0).ID()
// The number of allowed values from the check constraints may surpass
// maxScanCount. Find this number so we don't inadvertently disable this
// optimization.
if checkConstraintValues, ok := c.numAllowedValues(firstColID, sp.Table); ok {
if maxScanCount < checkConstraintValues {
if checkConstraintValues > hardMaxScanCount {
maxScanCount = hardMaxScanCount
} else {
maxScanCount = checkConstraintValues
}
}
}
keyCtx := constraint.MakeKeyContext(&cons.Columns, c.e.evalCtx)
spans := cons.Spans
// Get the total number of keys that can be extracted from the spans. Also
// keep track of the first span which would exceed the Scan budget if it was
// used to construct new Scan operators.
var keyCount int
budgetExceededIndex := spans.Count()
additionalScanBudget := maxScanCount
for i := 0; i < spans.Count(); i++ {
if cnt, ok := spans.Get(i).KeyCount(&keyCtx, keyPrefixLength); ok {
keyCount += int(cnt)
additionalScanBudget -= int(cnt)
if additionalScanBudget < 0 {
// Splitting any spans from this span on would lead to exceeding the max
// Scan count. Keep track of the index of this span.
budgetExceededIndex = i
break
}
}
}
if keyCount <= 0 || (keyCount == 1 && spans.Count() == 1) || budgetExceededIndex == 0 {
// Ensure that at least one new Scan will be constructed.
return nil, false
}
scanCount := keyCount
if scanCount > maxScanCount {
// We will construct at most maxScanCount new Scans.
scanCount = maxScanCount
}
rowCount := scan.Relational().Statistics().RowCount
if limit > 0 {
nLogN := rowCount * math.Log2(rowCount)
if scan.Relational().Statistics().Available &&
float64(scanCount*randIOCostFactor+limit*seqIOCostFactor) >= nLogN {
// Splitting the Scan may not be worth the overhead. Creating a sequence of
// Scans and Unions is expensive, so we only want to create the plan if it
// is likely to be used. This rewrite is competing against a plan with a
// sort, which is approximately O(N log N), so use that as a rough costing
// threshold.
return nil, false
}
}
// The index ordering must have a prefix of columns of length keyLength
// followed by the ordering columns either in order or in reverse order.
hasOrderingSeq, reverse := indexHasOrderingSequence(
c.e.mem.Metadata(), scan, sp, ordering, keyPrefixLength)
if !hasOrderingSeq {
return nil, false
}
newHardLimit := memo.MakeScanLimit(int64(limit), reverse)
// With a filter we might have to scan all rows before reaching the LIMIT.
if !filters.IsTrue() {
newHardLimit = 0
}
// makeNewUnion builds a UnionAll tree node with 'left' and 'right' child
// nodes, which may be scans, selects or previously-built UnionAllExprs. The
// ColumnIDs of the original Scan are used by the resulting expression, but it
// is up to the caller to set this via outCols.
makeNewUnion := func(left, right memo.RelExpr, outCols opt.ColList) memo.RelExpr {
return c.e.f.ConstructUnionAll(left, right, &memo.SetPrivate{
LeftCols: left.Relational().OutputCols.ToList(),
RightCols: right.Relational().OutputCols.ToList(),
OutCols: outCols,
})
}
// Attempt to extract single-key spans and use them to construct limited
// Scans. Add these Scans to a UnionAll tree. Any remaining spans will be used
// to construct a single unlimited Scan, which will also be added to the
// UnionAll tree.
var noLimitSpans constraint.Spans
var last memo.RelExpr
queue := list.New()
for i, n := 0, spans.Count(); i < n; i++ {
if i >= budgetExceededIndex {
// The Scan budget has been reached; no additional Scans can be created.
noLimitSpans.Append(spans.Get(i))
continue
}
singleKeySpans, ok := spans.Get(i).Split(&keyCtx, keyPrefixLength)
if !ok {
// Single key spans could not be extracted from this span, so add it to
// the set of spans that will be used to construct an unlimited Scan.
noLimitSpans.Append(spans.Get(i))
continue
}
for j, m := 0, singleKeySpans.Count(); j < m; j++ {
// Construct a new Scan for each span.
newScanPrivate := c.makeNewScanPrivate(
sp,
cons.Columns,
newHardLimit,
singleKeySpans.Get(j),
)
newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
if !filters.IsTrue() {
newScanOrSelect = c.wrapScanInLimitedSelect(
newScanOrSelect,
sp,
newScanPrivate,
filters,
limit,
)
}
queue.PushBack(newScanOrSelect)
}
}
// Return early if the queue is empty. This is possible if the first
// splittable span splits into a number of keys greater than maxScanCount.
if queue.Len() == 0 {
return nil, false
}
var outCols opt.ColList
oddNumScans := (queue.Len() % 2) != 0
// Make the UNION ALLs as a balanced tree. This performs better for large
// numbers of spans than a left-deep tree because neighboring branches can
// be processed in parallel and the deeper the tree, the more comparisons the
// rows from the deepest nodes in the tree need to undergo as rows are merged
// for each UNION ALL operation.
//
// This balanced tree building works by feeding RelExprs into a queue and
// popping them from the front, combining each pair of nodes with a
// UnionAllOp. Initially only Scan or Select operations are pushed to the
// queue, so the first round of UnionAllOps, representing the deepest level of
// the tree, contain only unions of scans. As UnionAllOps are built, they are
// fed into the back of the queue, and when all scans have been popped from
// the queue, the unions are processed. The unions themselves are then built
// as children of new UnionAllOps. For every two UnionAllOps popped, we push
// one new parent UnionAllOp onto the queue. Eventually we will end up with
// only a single RelExpr in the queue, which is our root node.
for queue.Len() > 1 {
left := queue.Front()
queue.Remove(left)
right := queue.Front()
queue.Remove(right)
if oddNumScans &&
(left.Value.(memo.RelExpr).Op() == opt.UnionAllOp ||
right.Value.(memo.RelExpr).Op() == opt.UnionAllOp) {
// Not necessary, but keep spans with lower values in the left subtree.
left, right = right, left
}
// TODO(mgartner/msirek): Converting ColSets to ColLists here is only safe
// because column IDs are always allocated in a consistent, ascending order
// for each duplicated table in the metadata. If column ID allocation
// changes, this could break.
if noLimitSpans.Count() == 0 && queue.Len() == 0 {
outCols = sp.Cols.ToList()
} else {
_, cols := c.DuplicateColumnIDs(sp.Table, sp.Cols)
outCols = cols.ToList()
}
queue.PushBack(makeNewUnion(left.Value.(memo.RelExpr), right.Value.(memo.RelExpr), outCols))
}
lastElem := queue.Front()
last = lastElem.Value.(memo.RelExpr)
queue.Remove(lastElem)
if noLimitSpans.Count() == spans.Count() {
// Expect to generate at least one new limited single-key Scan. This could
// happen if a valid key count could be obtained for at least span, but no
// span could be split into single-key spans.
return nil, false
}
if noLimitSpans.Count() == 0 {
// All spans could be used to generate limited Scans.
return last, true
}
// If any spans could not be used to generate limited Scans, use them to
// construct an unlimited Scan and add it to the UnionAll tree.
newScanPrivate := c.DuplicateScanPrivate(sp)
// Map from cons Columns to new columns.
newScanPrivate.SetConstraint(c.e.evalCtx, &constraint.Constraint{
Columns: cons.Columns.RemapColumns(sp.Table, newScanPrivate.Table),
Spans: noLimitSpans,
})
newScanOrSelect := c.e.f.ConstructScan(newScanPrivate)
if !filters.IsTrue() {
newScanOrSelect = c.wrapScanInLimitedSelect(newScanOrSelect, sp, newScanPrivate, filters, limit)
}
// TODO(mgartner/msirek): Converting ColSets to ColLists here is only safe
// because column IDs are always allocated in a consistent, ascending order
// for each duplicated table in the metadata. If column ID allocation
// changes, this could break.
return makeNewUnion(last, newScanOrSelect, sp.Cols.ToList()), true
}
// numAllowedValues returns the number of allowed values for a column with a
// given columnID by checking if there is a CHECK constraint with no nulls
// defined on it and returning the maximum number of results that can be read
// from Spans on that individual column. If not found, (0, false) is returned.
func (c *CustomFuncs) numAllowedValues(
columnID opt.ColumnID, tabID opt.TableID,
) (numValues int, ok bool) {
tabMeta := c.e.f.Metadata().TableMeta(tabID)
constraints := tabMeta.Constraints
if constraints == nil {
return 0, false
}
if constraints.Op() != opt.FiltersOp {
return 0, false
}
cols := opt.MakeColSet(columnID)
filters := *constraints.(*memo.FiltersExpr)
// For each ANDed check constraint...
for i := 0; i < len(filters); i++ {
filter := filters[i]
// This must be some type of comparison operation, or an OR or AND
// expression. These operations have at least 2 children.
if filter.Condition.ChildCount() < 2 {
continue
}
if filter.ScalarProps().Constraints == nil {
continue
}
for j := 0; j < filter.ScalarProps().Constraints.Length(); j++ {
constraint := filter.ScalarProps().Constraints.Constraint(j)
firstConstraintColID := constraint.Columns.Get(0).ID()
if columnID != firstConstraintColID {
continue
}
if constraint.IsContradiction() || constraint.IsUnconstrained() {
continue
}
if distinctVals, ok := constraint.CalculateMaxResults(c.e.evalCtx, cols, cols); ok {
if distinctVals > math.MaxInt32 {
return math.MaxInt32, true
}
return int(distinctVals), true
}
}
}
return 0, false
}
// wrapScanInLimitedSelect wraps "scan" in a SelectExpr with filters mapped from
// the originalScanPrivate columns to the columns in scan. If limit is non-zero,
// the SelectExpr is wrapped in a LimitExpr with that limit.
func (c *CustomFuncs) wrapScanInLimitedSelect(
scan memo.RelExpr,
originalScanPrivate, newScanPrivate *memo.ScanPrivate,
filters memo.FiltersExpr,
limit int,
) (limitedSelect memo.RelExpr) {
limitedSelect = c.e.f.ConstructSelect(
scan,
c.RemapScanColsInFilter(filters, originalScanPrivate, newScanPrivate),
)
if limit != 0 {
limitedSelect = c.e.f.ConstructLimit(
limitedSelect,
c.IntConst(tree.NewDInt(tree.DInt(limit))),
c.EmptyOrdering(),
)
}
return limitedSelect
}
// indexHasOrderingSequence returns whether the Scan can provide a given
// ordering under the assumption that we are scanning a single-key span with the
// given keyLength (and if so, whether we need to scan it in reverse).
// For example:
//
// index: +1/-2/+3,
// limitOrdering: -2/+3,
// keyLength: 1,
// =>
// hasSequence: True, reverse: False
//
// index: +1/-2/+3,
// limitOrdering: +2/-3,
// keyLength: 1,
// =>
// hasSequence: True, reverse: True
//
// index: +1/-2/+3/+4,
// limitOrdering: +3/+4,
// keyLength: 1,
// =>
// hasSequence: False, reverse: False
func indexHasOrderingSequence(
md *opt.Metadata,
scan memo.RelExpr,
sp *memo.ScanPrivate,
limitOrdering props.OrderingChoice,
keyLength int,
) (hasSequence, reverse bool) {
tableMeta := md.TableMeta(sp.Table)
index := tableMeta.Table.Index(sp.Index)
if keyLength > index.ColumnCount() {
// The key contains more columns than the index. The limit ordering sequence
// cannot be part of the index ordering.
return false, false
}
// Create a copy of the Scan's FuncDepSet, and add the first 'keyCount'
// columns from the index as constant columns. The columns are constant
// because the span contains only a single key on those columns.
var fds props.FuncDepSet
fds.CopyFrom(&scan.Relational().FuncDeps)
prefixCols := opt.ColSet{}
for i := 0; i < keyLength; i++ {
col := sp.Table.IndexColumnID(index, i)
prefixCols.Add(col)
}
fds.AddConstants(prefixCols)
// Use fds to simplify a copy of the limit ordering; the prefix columns will
// become part of the optional ColSet.
requiredOrdering := limitOrdering.Copy()
requiredOrdering.Simplify(&fds)
// If the ScanPrivate can satisfy requiredOrdering, it must return columns
// ordered by a prefix of length keyLength, followed by the columns of
// limitOrdering.
return ordering.ScanPrivateCanProvide(md, sp, &requiredOrdering)
}
// makeNewScanPrivate returns a new ScanPrivate with a new TableID and the given
// limit and span. All ColumnIDs and references to those ColumnIDs are replaced
// with new ones from the new TableID. All other fields are simply copied from
// the old ScanPrivate.
func (c *CustomFuncs) makeNewScanPrivate(
sp *memo.ScanPrivate,
columns constraint.Columns,
newHardLimit memo.ScanLimit,
span *constraint.Span,
) *memo.ScanPrivate {
newScanPrivate := c.DuplicateScanPrivate(sp)
// duplicateScanPrivate does not initialize the Constraint or HardLimit
// fields, so we do that now.
newScanPrivate.HardLimit = newHardLimit
// Construct the new Constraint field with the given span and remapped
// ordering columns.
var newSpans constraint.Spans
newSpans.InitSingleSpan(span)
newConstraint := &constraint.Constraint{
Columns: columns.RemapColumns(sp.Table, newScanPrivate.Table),
Spans: newSpans,
}
newScanPrivate.SetConstraint(c.e.evalCtx, newConstraint)
return newScanPrivate
}
// getKnownScanConstraint returns a Constraint that is known to hold true for
// the output of the Scan operator with the given ScanPrivate. If the
// ScanPrivate has a Constraint, the scan Constraint is returned. Otherwise, an
// effort is made to retrieve a Constraint from the underlying table's check
// constraints. getKnownScanConstraint assumes that the scan is not inverted.
func (c *CustomFuncs) getKnownScanConstraint(
sp *memo.ScanPrivate,
) (cons *constraint.Constraint, found bool) {
if sp.Constraint != nil {
// The ScanPrivate has a constraint, so return it.
cons = sp.Constraint
} else {
// Build a constraint set with the check constraints of the underlying
// table.
filters := c.checkConstraintFilters(sp.Table)
instance := c.initIdxConstraintForIndex(
nil, /* requiredFilters */
filters,
sp.Table,
sp.Index,
)
cons = instance.Constraint()
}
return cons, !cons.IsUnconstrained()
}