-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Our current join hints (a INNER HASH JOIN b, a LEFT LOOKUP JOIN b, etc) fixes both the join order and the join algorithm. This commit adds the syntax and support for hinting the join order without hinting the join algorithm. This will be useful for the (few) cases where the optimizer processes the tables in a suboptimal order. Resolves: #115308 Release note (sql change): It is now possible to hint to the optimizer that it should plan a straight join by using the syntax `... INNER STRAIGHT JOIN ...`. If the hint is provided, the optimizer will now fix the join order as given in the query, even if it estimates that a different plan using join reordering would have a lower cost.
- Loading branch information
1 parent
0baf22a
commit 365ea77
Showing
13 changed files
with
431 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
# LogicTest: local | ||
|
||
statement ok | ||
CREATE TABLE t1 (x INT, PRIMARY KEY (x)) | ||
|
||
statement ok | ||
CREATE TABLE t2 (x INT, y INT, z INT, PRIMARY KEY (x), INDEX idx_y (y)) | ||
|
||
# Set up the statistics as if t1 is much smaller than t2. | ||
statement ok | ||
ALTER TABLE t1 INJECT STATISTICS '[ | ||
{ | ||
"columns": ["x"], | ||
"created_at": "2018-01-01 1:00:00.00000+00:00", | ||
"row_count": 100, | ||
"distinct_count": 100 | ||
} | ||
]' | ||
|
||
statement ok | ||
ALTER TABLE t2 INJECT STATISTICS '[ | ||
{ | ||
"columns": ["y", "z"], | ||
"created_at": "2018-01-01 1:00:00.00000+00:00", | ||
"row_count": 10000, | ||
"distinct_count": 10000 | ||
} | ||
]' | ||
|
||
# -------------------------------------------------- | ||
# INNER JOIN | ||
# -------------------------------------------------- | ||
|
||
# The best plan should be a lookup join into t2 (right). | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.y | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• lookup join (inner) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 990 | ||
│ table: t2@t2_pkey | ||
│ equality: (x) = (x) | ||
│ equality cols are key | ||
│ | ||
└── • lookup join (inner) | ||
│ columns: (x, x, y) | ||
│ estimated row count: 990 | ||
│ table: t2@idx_y | ||
│ equality: (x) = (y) | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
# Should not change the plan, as the table on the right side of the join is still t2. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.y | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• lookup join (inner) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 990 | ||
│ table: t2@t2_pkey | ||
│ equality: (x) = (x) | ||
│ equality cols are key | ||
│ | ||
└── • lookup join (inner) | ||
│ columns: (x, x, y) | ||
│ estimated row count: 990 | ||
│ table: t2@idx_y | ||
│ equality: (x) = (y) | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
# Now, the best plan (lookup join into t2) should no longer be picked as t1 is now on the right. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.y | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (inner) | ||
│ columns: (x, y, z, x) | ||
│ estimated row count: 990 | ||
│ equality: (y) = (x) | ||
│ right cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x, y, z) | ||
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
│ table: t2@t2_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
|
||
# The best plan should be a hash join into t1 (smaller table). | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.z | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (inner) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 990 | ||
│ equality: (z) = (x) | ||
│ right cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x, y, z) | ||
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
│ table: t2@t2_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
# Should not change the plan, as the table on the right side of the join is t1. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.z | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (inner) | ||
│ columns: (x, y, z, x) | ||
│ estimated row count: 990 | ||
│ equality: (z) = (x) | ||
│ right cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x, y, z) | ||
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
│ table: t2@t2_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
# Now, the best plan (hash join into t1) should no longer be picked as t2 is now on the right. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.z | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (inner) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 990 | ||
│ equality: (x) = (z) | ||
│ left cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x) | ||
│ estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
│ table: t1@t1_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x, y, z) | ||
estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
table: t2@t2_pkey | ||
spans: FULL SCAN | ||
|
||
# -------------------------------------------------- | ||
# LEFT JOIN | ||
# -------------------------------------------------- | ||
|
||
# The best plan should be a (commuted) right outer hash join into t1. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT JOIN t2 ON t1.x = t2.z | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (right outer) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 1,000 | ||
│ equality: (z) = (x) | ||
│ right cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x, y, z) | ||
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
│ table: t2@t2_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x) | ||
estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
table: t1@t1_pkey | ||
spans: FULL SCAN | ||
|
||
# Now, the best plan should no longer be picked, as we're forcing the join order. | ||
query T | ||
EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT STRAIGHT JOIN t2 ON t1.x = t2.z | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• hash join (left outer) | ||
│ columns: (x, x, y, z) | ||
│ estimated row count: 1,000 | ||
│ equality: (x) = (z) | ||
│ left cols are key | ||
│ | ||
├── • scan | ||
│ columns: (x) | ||
│ estimated row count: 100 (100% of the table; stats collected <hidden> ago) | ||
│ table: t1@t1_pkey | ||
│ spans: FULL SCAN | ||
│ | ||
└── • scan | ||
columns: (x, y, z) | ||
estimated row count: 10,000 (100% of the table; stats collected <hidden> ago) | ||
table: t2@t2_pkey | ||
spans: FULL SCAN |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.