Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql,opt: add hint for STRAIGHT join #116013

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,7 @@ unreserved_keyword ::=
| 'STORE'
| 'STORED'
| 'STORING'
| 'STRAIGHT'
| 'STREAM'
| 'STRICT'
| 'SUBSCRIPTION'
Expand Down Expand Up @@ -4057,6 +4058,7 @@ bare_label_keywords ::=
| 'STORE'
| 'STORED'
| 'STORING'
| 'STRAIGHT'
| 'STREAM'
| 'STRICT'
| 'STRING'
Expand Down Expand Up @@ -4167,6 +4169,7 @@ opt_join_hint ::=
| 'MERGE'
| 'LOOKUP'
| 'INVERTED'
| 'STRAIGHT'
|

join_type ::=
Expand Down
273 changes: 273 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/straight_join
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# LogicTest: local

statement ok
CREATE TABLE t1 (x INT, PRIMARY KEY (x))

statement ok
CREATE TABLE t2 (x INT, y INT, z INT, PRIMARY KEY (x), INDEX idx_y (y))

# Set up the statistics as if t1 is much smaller than t2.
statement ok
ALTER TABLE t1 INJECT STATISTICS '[
{
"columns": ["x"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 100,
"distinct_count": 100,
"null_count": 0
}
]'

statement ok
ALTER TABLE t2 INJECT STATISTICS '[
{
"columns": ["y"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 10000,
"null_count": 0
},
{
"columns": ["z"],
"created_at": "2018-01-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 10000,
"null_count": 0
}
]'

# --------------------------------------------------
# INNER JOIN
# --------------------------------------------------

# The best plan should be a lookup join into t2 (right).
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.y
----
distribution: local
vectorized: true
·
• lookup join (inner)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ table: t2@t2_pkey
│ equality: (x) = (x)
│ equality cols are key
└── • lookup join (inner)
│ columns: (x, x, y)
│ estimated row count: 100
│ table: t2@idx_y
│ equality: (x) = (y)
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN

# Should not change the plan, as the table on the right side of the join is still t2.
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.y
----
distribution: local
vectorized: true
·
• lookup join (inner)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ table: t2@t2_pkey
│ equality: (x) = (x)
│ equality cols are key
└── • lookup join (inner)
│ columns: (x, x, y)
│ estimated row count: 100
│ table: t2@idx_y
│ equality: (x) = (y)
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN

# Now, the best plan (lookup join into t2) should no longer be picked as t1 is now on the right.
query T
EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.y
----
distribution: local
vectorized: true
·
• hash join (inner)
│ columns: (x, y, z, x)
│ estimated row count: 100
│ equality: (y) = (x)
│ right cols are key
├── • scan
│ columns: (x, y, z)
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
│ table: t2@t2_pkey
│ spans: FULL SCAN
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN


# The best plan should be a hash join into t1 (smaller table).
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (inner)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ equality: (z) = (x)
│ right cols are key
├── • scan
│ columns: (x, y, z)
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
│ table: t2@t2_pkey
│ spans: FULL SCAN
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN

# Should not change the plan, as the table on the right side of the join is t1.
query T
EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (inner)
│ columns: (x, y, z, x)
│ estimated row count: 100
│ equality: (z) = (x)
│ right cols are key
├── • scan
│ columns: (x, y, z)
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
│ table: t2@t2_pkey
│ spans: FULL SCAN
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN

# Now, the best plan (hash join into t1) should no longer be picked as t2 is now on the right.
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (inner)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ equality: (x) = (z)
│ left cols are key
├── • scan
│ columns: (x)
│ estimated row count: 100 (100% of the table; stats collected <hidden> ago)
│ table: t1@t1_pkey
│ spans: FULL SCAN
└── • scan
columns: (x, y, z)
estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
table: t2@t2_pkey
spans: FULL SCAN

# --------------------------------------------------
# LEFT JOIN
# --------------------------------------------------

# The best plan should be a (commuted) right outer hash join into t1.
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT JOIN t2 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (right outer)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ equality: (z) = (x)
│ right cols are key
├── • scan
│ columns: (x, y, z)
│ estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
│ table: t2@t2_pkey
│ spans: FULL SCAN
└── • scan
columns: (x)
estimated row count: 100 (100% of the table; stats collected <hidden> ago)
table: t1@t1_pkey
spans: FULL SCAN

# Now, the best plan should no longer be picked, as we're forcing the join order.
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT STRAIGHT JOIN t2 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (left outer)
│ columns: (x, x, y, z)
│ estimated row count: 100
│ equality: (x) = (z)
│ left cols are key
├── • scan
│ columns: (x)
│ estimated row count: 100 (100% of the table; stats collected <hidden> ago)
│ table: t1@t1_pkey
│ spans: FULL SCAN
└── • scan
columns: (x, y, z)
estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
table: t2@t2_pkey
spans: FULL SCAN

# Should produce the same plan except with a right outer hash join.
query T
EXPLAIN (VERBOSE) SELECT * FROM t1 RIGHT STRAIGHT JOIN t2 ON t1.x = t2.z
----
distribution: local
vectorized: true
·
• hash join (right outer)
│ columns: (x, x, y, z)
│ estimated row count: 10,000
│ equality: (x) = (z)
│ left cols are key
├── • scan
│ columns: (x)
│ estimated row count: 100 (100% of the table; stats collected <hidden> ago)
│ table: t1@t1_pkey
│ spans: FULL SCAN
└── • scan
columns: (x, y, z)
estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
table: t2@t2_pkey
spans: FULL SCAN
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/sql/opt/memo/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,11 @@ const (

// AllowOnlyMergeJoin has all "disallow" flags set except DisallowMergeJoin.
AllowOnlyMergeJoin = disallowAll ^ DisallowMergeJoin

// AllowAllJoinsIntoRight has all "disallow" flags set except
// DisallowHashJoinStoreRight, DisallowLookupJoinIntoRight,
// DisallowInvertedJoinIntoRight, and DisallowMergeJoin.
AllowAllJoinsIntoRight = disallowAll ^ DisallowHashJoinStoreRight ^ DisallowLookupJoinIntoRight ^ DisallowInvertedJoinIntoRight ^ DisallowMergeJoin
)

var joinFlagStr = map[JoinFlags]string{
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/optbuilder/join.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ func (b *Builder) buildJoin(
telemetry.Inc(sqltelemetry.MergeJoinHintUseCounter)
flags = memo.AllowOnlyMergeJoin

case tree.AstStraight:
telemetry.Inc(sqltelemetry.StraightJoinHintUseCounter)
flags = memo.AllowAllJoinsIntoRight

default:
panic(pgerror.Newf(
pgcode.FeatureNotSupported, "join hint %s not supported", join.Hint,
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/opt/optbuilder/testdata/join
Original file line number Diff line number Diff line change
Expand Up @@ -2436,6 +2436,21 @@ project
└── filters
└── x:1 = y:5

build
SELECT * FROM onecolumn AS a(x) INNER STRAIGHT JOIN onecolumn AS b(y) ON a.x = b.y
----
project
├── columns: x:1!null y:5!null
└── inner-join (hash)
├── columns: x:1!null a.rowid:2!null a.crdb_internal_mvcc_timestamp:3 a.tableoid:4 y:5!null b.rowid:6!null b.crdb_internal_mvcc_timestamp:7 b.tableoid:8
├── flags: disallow hash join (store left side) and lookup join (into left side) and inverted join (into left side)
├── scan onecolumn [as=a]
│ └── columns: x:1 a.rowid:2!null a.crdb_internal_mvcc_timestamp:3 a.tableoid:4
├── scan onecolumn [as=b]
│ └── columns: y:5 b.rowid:6!null b.crdb_internal_mvcc_timestamp:7 b.tableoid:8
└── filters
└── x:1 = y:5

build
SELECT * FROM onecolumn AS a NATURAL LEFT LOOKUP JOIN onecolumn as b USING(x)
----
Expand Down