Skip to content

Commit

Permalink
Merge #114652
Browse files Browse the repository at this point in the history
114652: sql: don't bump RC txn read timestamp before commit r=nvanbenschoten a=nvanbenschoten

Fixes #109628.

This commit removes the bumping of the read committed transactions' read timestamp in `connExecutor.commitSQLTransactionInternal`. Bumping the transaction's external read timestamp is not needed before committing, and it causes the transaction to commit at a higher timestamp than necessary. On highly contended workloads like the one from #109628, this can cause unnecessary contention by inflating the contention footprint of each transaction (i.e. the duration measured in the MVCC time domain that the transaction holds locks).

By not bumping the read timestamp immediately before committed, we improve the performance of contended workloads. For example, on the workload from #109628, we see the following improvement:

## Summary
```
Serializable:            232.6 tps
Read Committed (before): 225.3 tps
Read Committed (after):  236.0 tps
```
Read Committed improves by **4.7%** and is now **1.5%** faster than Serializable on the workload.

## Raw
```
### Serializable

transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql
scaling factor: 10                                    scaling factor: 10                                    scaling factor: 10
query mode: simple                                    query mode: simple                                    query mode: simple
number of clients: 25                                 number of clients: 25                                 number of clients: 25
number of threads: 10                                 number of threads: 10                                 number of threads: 10
duration: 120 s                                       duration: 120 s                                       duration: 120 s
number of transactions actually processed: 27935      number of transactions actually processed: 27584      number of transactions actually processed: 28380
number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)
number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)
number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)
number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)
total number of retries: 0                            total number of retries: 0                            total number of retries: 0
latency average = 107.483 ms                          latency average = 108.829 ms                          latency average = 105.776 ms
latency stddev = 264.163 ms                           latency stddev = 263.132 ms                           latency stddev = 244.713 ms
initial connection time = 12.315 ms                   initial connection time = 11.692 ms                   initial connection time = 9.448 ms
tps = 232.157370 (without initial connection time)    tps = 229.458565 (without initial connection time)    tps = 236.039695 (without initial connection time)


### Read Committed (before)

transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql
scaling factor: 10                                    scaling factor: 10                                    scaling factor: 10
query mode: simple                                    query mode: simple                                    query mode: simple
number of clients: 25                                 number of clients: 25                                 number of clients: 25
number of threads: 10                                 number of threads: 10                                 number of threads: 10
duration: 120 s                                       duration: 120 s                                       duration: 120 s
number of transactions actually processed: 27220      number of transactions actually processed: 27143      number of transactions actually processed: 26966
number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)
number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)
number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)
number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)
total number of retries: 0                            total number of retries: 0                            total number of retries: 0
latency average = 110.293 ms                          latency average = 110.646 ms                          latency average = 111.354 ms
latency stddev = 173.640 ms                           latency stddev = 180.792 ms                           latency stddev = 179.226 ms
initial connection time = 8.911 ms                    initial connection time = 9.894 ms                    initial connection time = 10.605 ms
tps = 226.389120 (without initial connection time)    tps = 225.427664 (without initial connection time)    tps = 224.059547 (without initial connection time)


### Read Committed (after)

transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql                  transaction type: tpcb-cockroach.sql
scaling factor: 10                                    scaling factor: 10                                    scaling factor: 10
query mode: simple                                    query mode: simple                                    query mode: simple
number of clients: 25                                 number of clients: 25                                 number of clients: 25
number of threads: 10                                 number of threads: 10                                 number of threads: 10
duration: 120 s                                       duration: 120 s                                       duration: 120 s
number of transactions actually processed: 28526      number of transactions actually processed: 28564      number of transactions actually processed: 28039
number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)             number of failed transactions: 0 (0.000%)
number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)          number of serialization failures: 0 (0.000%)
number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)               number of deadlock failures: 0 (0.000%)
number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)            number of transactions retried: 0 (0.000%)
total number of retries: 0                            total number of retries: 0                            total number of retries: 0
latency average = 105.221 ms                          latency average = 105.114 ms                          latency average = 107.065 ms
latency stddev = 196.386 ms                           latency stddev = 197.031 ms                           latency stddev = 203.784 ms
initial connection time = 12.549 ms                   initial connection time = 11.212 ms                   initial connection time = 7.715 ms
tps = 237.329979 (without initial connection time)    tps = 237.417802 (without initial connection time)    tps = 233.194327 (without initial connection time)
```

Release note: None

Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
  • Loading branch information
craig[bot] and nvanbenschoten committed Nov 17, 2023
2 parents 321e608 + a012fbe commit 557264e
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 4 deletions.
17 changes: 13 additions & 4 deletions pkg/sql/conn_executor_exec.go
Expand Up @@ -1438,12 +1438,21 @@ func (ex *connExecutor) commitSQLTransactionInternal(ctx context.Context) error

ex.extraTxnState.prepStmtsNamespace.closeAllPortals(ctx, &ex.extraTxnState.prepStmtsNamespaceMemAcc)

// We need to step the transaction before committing if it has stepping
// enabled. If it doesn't have stepping enabled, then we just set the
// stepping mode back to what it was.
// We need to step the transaction's internal read sequence before committing
// if it has stepping enabled. If it doesn't have stepping enabled, then we
// just set the stepping mode back to what it was.
//
// Even if we do step the transaction's internal read sequence, we do not
// advance its external read timestamp (applicable only to read committed
// transactions). This is because doing so is not needed before committing,
// and it would cause the transaction to commit at a higher timestamp than
// necessary. On heavily contended workloads like the one from #109628, this
// can cause unnecessary write-write contention between transactions by
// inflating the contention footprint of each transaction (i.e. the duration
// measured in MVCC time that the transaction holds locks).
prevSteppingMode := ex.state.mu.txn.ConfigureStepping(ctx, kv.SteppingEnabled)
if prevSteppingMode == kv.SteppingEnabled {
if err := ex.state.mu.txn.Step(ctx, true /* allowReadTimestampStep */); err != nil {
if err := ex.state.mu.txn.Step(ctx, false /* allowReadTimestampStep */); err != nil {
return err
}
} else {
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/tests/BUILD.bazel
Expand Up @@ -124,6 +124,7 @@ go_test(
"//pkg/testutils/testcluster",
"//pkg/util/allstacks",
"//pkg/util/ctxgroup",
"//pkg/util/hlc",
"//pkg/util/ioctx",
"//pkg/util/json",
"//pkg/util/leaktest",
Expand Down
58 changes: 58 additions & 0 deletions pkg/sql/tests/read_committed_test.go
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -148,6 +149,63 @@ func TestReadCommittedStmtRetry(t *testing.T) {
require.True(t, sawWriteTooOldError.Load())
}

// TestReadCommittedReadTimestampNotSteppedOnCommit verifies that the read
// timestamp of a read committed transaction is stepped between SQL statements,
// but not before commit.
func TestReadCommittedReadTimestampNotSteppedOnCommit(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Keep track of the read timestamps of the read committed transaction during
// each KV operation.
var txnReadTimestamps []hlc.Timestamp
filterFunc := func(ctx context.Context, ba *kvpb.BatchRequest, _ *kvpb.BatchResponse) *kvpb.Error {
if ba.Txn == nil || ba.Txn.IsoLevel != isolation.ReadCommitted {
return nil
}
req := ba.Requests[0]
method := req.GetInner().Method()
if method == kvpb.ConditionalPut || (method == kvpb.EndTxn && req.GetEndTxn().IsParallelCommit()) {
txnReadTimestamps = append(txnReadTimestamps, ba.Txn.ReadTimestamp)
}
return nil
}

ctx := context.Background()
params := base.TestServerArgs{}
params.Knobs.Store = &kvserver.StoreTestingKnobs{
// NOTE: we use a TestingResponseFilter and not a TestingRequestFilter to
// avoid potential flakiness from requests which are redirected or retried.
TestingResponseFilter: filterFunc,
}
s, sqlDB, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)

_, err := sqlDB.Exec(`SET CLUSTER SETTING sql.txn.read_committed_isolation.enabled = true`)
require.NoError(t, err)
_, err = sqlDB.Exec(`CREATE TABLE kv (k TEXT, v INT) WITH (sql_stats_automatic_collection_enabled = false);`)
require.NoError(t, err)

// Create a read committed transaction that writes to three rows in three
// different statements and then commits.
tx, err := sqlDB.BeginTx(ctx, &gosql.TxOptions{Isolation: gosql.LevelReadCommitted})
require.NoError(t, err)
_, err = tx.Exec(`INSERT INTO kv VALUES ('a', 1);`)
require.NoError(t, err)
_, err = tx.Exec(`INSERT INTO kv VALUES ('b', 2);`)
require.NoError(t, err)
_, err = tx.Exec(`INSERT INTO kv VALUES ('c', 3);`)
require.NoError(t, err)
require.NoError(t, tx.Commit())

// Verify that the transaction's read timestamp was not stepped on commit but
// was stepped between every other statement.
require.Len(t, txnReadTimestamps, 4)
require.True(t, txnReadTimestamps[0].Less(txnReadTimestamps[1]))
require.True(t, txnReadTimestamps[1].Less(txnReadTimestamps[2]))
require.True(t, txnReadTimestamps[2].Equal(txnReadTimestamps[3]))
}

// TestReadCommittedVolatileUDF verifies that volatile UDFs running under
// READ COMMITTED do not have their external read timestamp incremented more
// than they should be.
Expand Down

0 comments on commit 557264e

Please sign in to comment.