Skip to content

Commit

Permalink
opt: add lock operator
Browse files Browse the repository at this point in the history
Add a new implementation of `SELECT FOR UPDATE` and `SELECT FOR SHARE`
statements. Instead of locking during the initial row fetch, this new
implementation constructs a `Lock` operator on the top of the query plan
which performs the locking phase using a locking semi-join lookup.

During optbuilder we build plans with both `Lock` operators and
initial-row-fetch locking. During execbuilder we decide which
implementation to use based on the isolation level and whether
`optimizer_use_lock_op_for_serializable` is set. If the new
implementation is chosen, `Lock` operators become locking
semi-LookupJoins.

In some cases these new plans will have superfluous lookup joins. A
future PR will optimize away some of these superfluous lookup joins.

Fixes: #57031, #75457

Epic: CRDB-25322

Release note (sql change): Add a new session variable,
`optimizer_use_lock_op_for_serializable`, which when set enables a new
implementation of `SELECT FOR UPDATE`. This new implementation of
`SELECT FOR UPDATE` acquires row locks *after* any joins and filtering,
and always acquires row locks on the primary index of the table being
locked. This more closely matches `SELECT FOR UPDATE` behavior in
PostgreSQL, but at the cost of more round trips from gateway node to
replica leaseholder.

Under read committed isolation (and other isolation levels weaker than
serializable) we will always use this new implementation of `SELECT FOR
UPDATE` regardless of the value of
`optimizer_use_lock_op_for_serializable` to ensure correctness.
  • Loading branch information
michae2 committed Oct 9, 2023
1 parent 4cef6e7 commit aae01f9
Show file tree
Hide file tree
Showing 33 changed files with 2,351 additions and 124 deletions.
7 changes: 7 additions & 0 deletions pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Expand Up @@ -3667,6 +3667,10 @@ func (m *sessionDataMutator) SetUnsafeSettingInterlockKey(val string) {
m.data.UnsafeSettingInterlockKey = val
}

func (m *sessionDataMutator) SetOptimizerUseLockOpForSerializable(val bool) {
m.data.OptimizerUseLockOpForSerializable = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Expand Up @@ -5568,6 +5568,7 @@ optimizer_use_improved_disjunction_stats on
optimizer_use_improved_join_elimination on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_lock_op_for_serializable off
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
override_multi_region_zone_config off
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Expand Up @@ -2886,6 +2886,7 @@ optimizer_use_improved_disjunction_stats on N
optimizer_use_improved_join_elimination on NULL NULL NULL string
optimizer_use_improved_split_disjunction_for_joins on NULL NULL NULL string
optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string
optimizer_use_lock_op_for_serializable off NULL NULL NULL string
optimizer_use_multicol_stats on NULL NULL NULL string
optimizer_use_not_visible_indexes off NULL NULL NULL string
override_multi_region_zone_config off NULL NULL NULL string
Expand Down Expand Up @@ -3048,6 +3049,7 @@ optimizer_use_improved_disjunction_stats on N
optimizer_use_improved_join_elimination on NULL user NULL on on
optimizer_use_improved_split_disjunction_for_joins on NULL user NULL on on
optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on
optimizer_use_lock_op_for_serializable off NULL user NULL off off
optimizer_use_multicol_stats on NULL user NULL on on
optimizer_use_not_visible_indexes off NULL user NULL off off
override_multi_region_zone_config off NULL user NULL off off
Expand Down Expand Up @@ -3209,6 +3211,7 @@ optimizer_use_improved_disjunction_stats NULL NULL NULL
optimizer_use_improved_join_elimination NULL NULL NULL NULL NULL
optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL NULL NULL
optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL
optimizer_use_lock_op_for_serializable NULL NULL NULL NULL NULL
optimizer_use_multicol_stats NULL NULL NULL NULL NULL
optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL
override_multi_region_zone_config NULL NULL NULL NULL NULL
Expand Down
182 changes: 182 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/select_for_update_read_committed
@@ -0,0 +1,182 @@
# LogicTest: !local-mixed-22.2-23.1

statement ok
SET CLUSTER SETTING sql.txn.read_committed_syntax.enabled = true

statement ok
SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED

statement ok
CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT, INDEX (b), FAMILY (a, b, c))

statement ok
INSERT INTO abc VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)

statement ok
CREATE TABLE bcd (b INT PRIMARY KEY, c INT, d INT, INDEX (c), FAMILY (b, c, d))

statement ok
INSERT INTO bcd VALUES (20, 200, 2000), (30, 300, 3000), (40, 400, 4000)

statement ok
GRANT ALL on abc TO testuser

statement ok
GRANT ALL on bcd TO testuser

# First, hold locks on some rows of abc and bcd. We'll update abc at the end.

user testuser

statement ok
BEGIN

query III rowsort
SELECT * FROM abc WHERE a != 3 FOR UPDATE
----
1 10 100
2 20 200

query III
SELECT * FROM bcd ORDER BY c DESC LIMIT 2 FOR SHARE
----
40 400 4000
30 300 3000

# Then ensure we wait on the locks and see the updated rows after commit.

user root

# Normal reads do not block.

query III rowsort
SELECT * FROM abc
----
1 10 100
2 20 200
3 30 300

query III rowsort
SELECT * FROM bcd
----
20 200 2000
30 300 3000
40 400 4000

# SKIP LOCKED reads do not block.

query III rowsort
SELECT * FROM abc FOR UPDATE SKIP LOCKED
----
3 30 300

query III rowsort
SELECT * FROM bcd FOR UPDATE SKIP LOCKED
----
20 200 2000
30 300 3000
40 400 4000

# Shared reads block on exclusive locks but not on shared locks.

query III async,rowsort q00
SELECT * FROM abc FOR SHARE
----
1 11 101
2 21 201
3 30 300

query III rowsort
SELECT * FROM bcd FOR SHARE
----
20 200 2000
30 300 3000
40 400 4000

# Exclusive reads block on both.

query III async,rowsort q01
SELECT * FROM abc FOR UPDATE
----
1 11 101
2 21 201
3 30 300

query III async,rowsort q02
SELECT * FROM bcd FOR UPDATE
----
20 200 2000
30 300 3000
40 400 4000

# Try more exclusive-locking queries.

query I async q03
SELECT a FROM abc WHERE a = 2 FOR UPDATE
----
2

query I async q04
SELECT b FROM abc WHERE a = 2 FOR UPDATE
----
21

query I async q05
SELECT c FROM abc WHERE a = 2 FOR UPDATE
----
201

query I async q06
SELECT c FROM abc ORDER BY a DESC LIMIT 2 FOR UPDATE
----
300
201

query I async,rowsort q07
SELECT a + b + c FROM abc FOR UPDATE
----
113
224
333

# Try some joins

query IIIII async q08
SELECT * FROM abc JOIN bcd USING (b) FOR SHARE
----
30 3 300 300 3000

query IIIII async q09
SELECT * FROM abc JOIN bcd USING (c) FOR UPDATE
----
300 3 30 30 3000

user testuser

statement ok
UPDATE abc SET b = b + 1, c = c + 1 WHERE a != 3

statement ok
COMMIT

user root

awaitquery q00

awaitquery q01

awaitquery q02

awaitquery q03

awaitquery q04

awaitquery q05

awaitquery q06

awaitquery q07

awaitquery q08

awaitquery q09
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Expand Up @@ -124,6 +124,7 @@ optimizer_use_improved_disjunction_stats on
optimizer_use_improved_join_elimination on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_lock_op_for_serializable off
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
override_multi_region_zone_config off
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist-disk/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/local-vec-off/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/local/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions pkg/sql/opt/exec/execbuilder/mutation.go
Expand Up @@ -1134,3 +1134,29 @@ func unwrapProjectExprs(input memo.RelExpr) memo.RelExpr {
}
return input
}

func (b *Builder) buildLock(lock *memo.LockExpr) (execPlan, error) {
// Don't bother creating the lookup join if we don't need it.
locking, err := b.buildLocking(lock.Locking)
if err != nil {
return execPlan{}, err
}
if !locking.IsLocking() {
return b.buildRelational(lock.Input)
}

join := &memo.LookupJoinExpr{
Input: lock.Input,
LookupJoinPrivate: memo.LookupJoinPrivate{
JoinType: opt.SemiJoinOp,
Table: lock.Table,
Index: cat.PrimaryIndex,
KeyCols: lock.KeyCols,
Cols: lock.Cols,
LookupColsAreTableKey: true,
Locking: locking,
},
}
join.CopyGroup(lock)
return b.buildLookupJoin(join)
}
6 changes: 5 additions & 1 deletion pkg/sql/opt/exec/execbuilder/relational.go
Expand Up @@ -292,6 +292,9 @@ func (b *Builder) buildRelational(e memo.RelExpr) (execPlan, error) {
case *memo.DeleteExpr:
ep, err = b.buildDelete(t)

case *memo.LockExpr:
ep, err = b.buildLock(t)

case *memo.CreateTableExpr:
ep, err = b.buildCreateTable(t)

Expand Down Expand Up @@ -3626,7 +3629,8 @@ func (b *Builder) statementTag(expr memo.RelExpr) string {
switch expr.Op() {
case opt.OpaqueRelOp, opt.OpaqueMutationOp, opt.OpaqueDDLOp:
return expr.Private().(*memo.OpaqueRelPrivate).Metadata.String()

case opt.LockOp:
return "SELECT " + expr.Private().(*memo.LockPrivate).Locking.Strength.String()
default:
return expr.Op().SyntaxTag()
}
Expand Down

0 comments on commit aae01f9

Please sign in to comment.