cockroachdb · craig · Feb 5, 2024 · Dec 11, 2023
diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf
@@ -1430,6 +1430,7 @@ unreserved_keyword ::=
 	| 'STORE'
 	| 'STORED'
 	| 'STORING'
+	| 'STRAIGHT'
 	| 'STREAM'
 	| 'STRICT'
 	| 'SUBSCRIPTION'
@@ -4057,6 +4058,7 @@ bare_label_keywords ::=
 	| 'STORE'
 	| 'STORED'
 	| 'STORING'
+	| 'STRAIGHT'
 	| 'STREAM'
 	| 'STRICT'
 	| 'STRING'
@@ -4167,6 +4169,7 @@ opt_join_hint ::=
 	| 'MERGE'
 	| 'LOOKUP'
 	| 'INVERTED'
+	| 'STRAIGHT'
 	| 
 
 join_type ::=

@@ -0,0 +1,273 @@
+# LogicTest: local
+
+statement ok
+CREATE TABLE t1 (x INT, PRIMARY KEY (x))
+
+statement ok
+CREATE TABLE t2 (x INT, y INT, z INT, PRIMARY KEY (x), INDEX idx_y (y))
+
+# Set up the statistics as if t1 is much smaller than t2.
+statement ok
+ALTER TABLE t1 INJECT STATISTICS '[
+  {
+    "columns": ["x"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 100,
+    "distinct_count": 100,
+    "null_count": 0
+  }
+]'
+
+statement ok
+ALTER TABLE t2 INJECT STATISTICS '[
+  {
+    "columns": ["y"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 10000,
+    "distinct_count": 10000,
+    "null_count": 0
+  },
+  {
+    "columns": ["z"],
+    "created_at": "2018-01-01 1:00:00.00000+00:00",
+    "row_count": 10000,
+    "distinct_count": 10000,
+    "null_count": 0
+  }
+]'
+
+# --------------------------------------------------
+# INNER JOIN
+# --------------------------------------------------
+
+# The best plan should be a lookup join into t2 (right).
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.y
+----
+distribution: local
+vectorized: true
+·
+• lookup join (inner)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ table: t2@t2_pkey
+│ equality: (x) = (x)
+│ equality cols are key
+│
+└── • lookup join (inner)
+    │ columns: (x, x, y)
+    │ estimated row count: 100
+    │ table: t2@idx_y
+    │ equality: (x) = (y)
+    │
+    └── • scan
+          columns: (x)
+          estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+          table: t1@t1_pkey
+          spans: FULL SCAN
+
+# Should not change the plan, as the table on the right side of the join is still t2.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.y
+----
+distribution: local
+vectorized: true
+·
+• lookup join (inner)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ table: t2@t2_pkey
+│ equality: (x) = (x)
+│ equality cols are key
+│
+└── • lookup join (inner)
+    │ columns: (x, x, y)
+    │ estimated row count: 100
+    │ table: t2@idx_y
+    │ equality: (x) = (y)
+    │
+    └── • scan
+          columns: (x)
+          estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+          table: t1@t1_pkey
+          spans: FULL SCAN
+
+# Now, the best plan (lookup join into t2) should no longer be picked as t1 is now on the right.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.y
+----
+distribution: local
+vectorized: true
+·
+• hash join (inner)
+│ columns: (x, y, z, x)
+│ estimated row count: 100
+│ equality: (y) = (x)
+│ right cols are key
+│
+├── • scan
+│     columns: (x, y, z)
+│     estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+│     table: t2@t2_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x)
+      estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+      table: t1@t1_pkey
+      spans: FULL SCAN
+
+
+# The best plan should be a hash join into t1 (smaller table).
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (inner)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ equality: (z) = (x)
+│ right cols are key
+│
+├── • scan
+│     columns: (x, y, z)
+│     estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+│     table: t2@t2_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x)
+      estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+      table: t1@t1_pkey
+      spans: FULL SCAN
+
+# Should not change the plan, as the table on the right side of the join is t1.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t2 INNER STRAIGHT JOIN t1 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (inner)
+│ columns: (x, y, z, x)
+│ estimated row count: 100
+│ equality: (z) = (x)
+│ right cols are key
+│
+├── • scan
+│     columns: (x, y, z)
+│     estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+│     table: t2@t2_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x)
+      estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+      table: t1@t1_pkey
+      spans: FULL SCAN
+
+# Now, the best plan (hash join into t1) should no longer be picked as t2 is now on the right.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 INNER STRAIGHT JOIN t2 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (inner)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ equality: (x) = (z)
+│ left cols are key
+│
+├── • scan
+│     columns: (x)
+│     estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+│     table: t1@t1_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x, y, z)
+      estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+      table: t2@t2_pkey
+      spans: FULL SCAN
+
+# --------------------------------------------------
+# LEFT JOIN
+# --------------------------------------------------
+
+# The best plan should be a (commuted) right outer hash join into t1.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT JOIN t2 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (right outer)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ equality: (z) = (x)
+│ right cols are key
+│
+├── • scan
+│     columns: (x, y, z)
+│     estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+│     table: t2@t2_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x)
+      estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+      table: t1@t1_pkey
+      spans: FULL SCAN
+
+# Now, the best plan should no longer be picked, as we're forcing the join order.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 LEFT STRAIGHT JOIN t2 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (left outer)
+│ columns: (x, x, y, z)
+│ estimated row count: 100
+│ equality: (x) = (z)
+│ left cols are key
+│
+├── • scan
+│     columns: (x)
+│     estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+│     table: t1@t1_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x, y, z)
+      estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+      table: t2@t2_pkey
+      spans: FULL SCAN
+
+# Should produce the same plan except with a right outer hash join.
+query T
+EXPLAIN (VERBOSE) SELECT * FROM t1 RIGHT STRAIGHT JOIN t2 ON t1.x = t2.z
+----
+distribution: local
+vectorized: true
+·
+• hash join (right outer)
+│ columns: (x, x, y, z)
+│ estimated row count: 10,000
+│ equality: (x) = (z)
+│ left cols are key
+│
+├── • scan
+│     columns: (x)
+│     estimated row count: 100 (100% of the table; stats collected <hidden> ago)
+│     table: t1@t1_pkey
+│     spans: FULL SCAN
+│
+└── • scan
+      columns: (x, y, z)
+      estimated row count: 10,000 (100% of the table; stats collected <hidden> ago)
+      table: t2@t2_pkey
+      spans: FULL SCAN
@@ -518,6 +518,11 @@ const (
 
 	// AllowOnlyMergeJoin has all "disallow" flags set except DisallowMergeJoin.
 	AllowOnlyMergeJoin = disallowAll ^ DisallowMergeJoin
+
+	// AllowAllJoinsIntoRight has all "disallow" flags set except
+	// DisallowHashJoinStoreRight, DisallowLookupJoinIntoRight,
+	// DisallowInvertedJoinIntoRight, and DisallowMergeJoin.
+	AllowAllJoinsIntoRight = disallowAll ^ DisallowHashJoinStoreRight ^ DisallowLookupJoinIntoRight ^ DisallowInvertedJoinIntoRight ^ DisallowMergeJoin
 )
 
 var joinFlagStr = map[JoinFlags]string{

@@ -94,6 +94,10 @@ func (b *Builder) buildJoin(
 		telemetry.Inc(sqltelemetry.MergeJoinHintUseCounter)
 		flags = memo.AllowOnlyMergeJoin
 
+	case tree.AstStraight:
+		telemetry.Inc(sqltelemetry.StraightJoinHintUseCounter)
+		flags = memo.AllowAllJoinsIntoRight
+
 	default:
 		panic(pgerror.Newf(
 			pgcode.FeatureNotSupported, "join hint %s not supported", join.Hint,

@@ -2436,6 +2436,21 @@ project
       └── filters
            └── x:1 = y:5
 
+build
+SELECT * FROM onecolumn AS a(x) INNER STRAIGHT JOIN onecolumn AS b(y) ON a.x = b.y
+----
+project
+ ├── columns: x:1!null y:5!null
+ └── inner-join (hash)
+      ├── columns: x:1!null a.rowid:2!null a.crdb_internal_mvcc_timestamp:3 a.tableoid:4 y:5!null b.rowid:6!null b.crdb_internal_mvcc_timestamp:7 b.tableoid:8
+      ├── flags: disallow hash join (store left side) and lookup join (into left side) and inverted join (into left side)
+      ├── scan onecolumn [as=a]
+      │    └── columns: x:1 a.rowid:2!null a.crdb_internal_mvcc_timestamp:3 a.tableoid:4
+      ├── scan onecolumn [as=b]
+      │    └── columns: y:5 b.rowid:6!null b.crdb_internal_mvcc_timestamp:7 b.tableoid:8
+      └── filters
+           └── x:1 = y:5
+
 build
 SELECT * FROM onecolumn AS a NATURAL LEFT LOOKUP JOIN onecolumn as b USING(x)
 ----