Skip to content

Commit

Permalink
kv: add server-side metrics for {successful,failed} 1PC evaluation
Browse files Browse the repository at this point in the history
This patch adds metrics for successful and failed 1PC evaluations, which
together give us the number of attempted 1PC evaluations by a store. We
then modify an existing test to use these metrics to verify that it was
able to successfully commit using 1PC.

Epic: None
Release note: None
  • Loading branch information
arulajmani committed Nov 8, 2023
1 parent 7a0b7f4 commit 933c4cd
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,8 @@
<tr><td>STORAGE</td><td>tscache.skl.pages</td><td>Number of pages in the timestamp cache</td><td>Pages</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>STORAGE</td><td>tscache.skl.rotations</td><td>Number of page rotations in the timestamp cache</td><td>Page Rotations</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.commit_waits.before_commit_trigger</td><td>Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.server_side.1PC.failure</td><td>Number of batches that attempted to commit using 1PC and failed</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.server_side.1PC.success</td><td>Number of batches that attempted to commit using 1PC and succeeded</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.server_side_retry.read_evaluation.failure</td><td>Number of read batches that were not successfully refreshed server side</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.server_side_retry.read_evaluation.success</td><td>Number of read batches that were successfully refreshed server side</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>STORAGE</td><td>txn.server_side_retry.uncertainty_interval_error.failure</td><td>Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side</td><td>KV Transactions</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
Expand Down
16 changes: 16 additions & 0 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,18 @@ var (
Measurement: "KV Transactions",
Unit: metric.Unit_COUNT,
}
metaOnePhaseCommitSuccess = metric.Metadata{
Name: "txn.server_side.1PC.success",
Help: "Number of batches that attempted to commit using 1PC and succeeded",
Measurement: "KV Transactions",
Unit: metric.Unit_COUNT,
}
metaOnePhaseCommitFailure = metric.Metadata{
Name: "txn.server_side.1PC.failure",
Help: "Number of batches that attempted to commit using 1PC and failed",
Measurement: "KV Transactions",
Unit: metric.Unit_COUNT,
}

//Ingest metrics
metaIngestCount = metric.Metadata{
Expand Down Expand Up @@ -2357,6 +2369,8 @@ type StoreMetrics struct {
ReadEvaluationServerSideRetryFailure *metric.Counter
ReadWithinUncertaintyIntervalErrorServerSideRetrySuccess *metric.Counter
ReadWithinUncertaintyIntervalErrorServerSideRetryFailure *metric.Counter
OnePhaseCommitSuccess *metric.Counter
OnePhaseCommitFailure *metric.Counter

// Storage (pebble) metrics. Some are named RocksDB which is what we used
// before pebble, and this name is kept for backwards compatibility despite
Expand Down Expand Up @@ -3035,6 +3049,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
ReadEvaluationServerSideRetryFailure: metric.NewCounter(metaReadEvaluationServerSideRetryFailure),
ReadWithinUncertaintyIntervalErrorServerSideRetrySuccess: metric.NewCounter(metaReadWithinUncertaintyIntervalErrorServerSideRetrySuccess),
ReadWithinUncertaintyIntervalErrorServerSideRetryFailure: metric.NewCounter(metaReadWithinUncertaintyIntervalErrorServerSideRetryFailure),
OnePhaseCommitSuccess: metric.NewCounter(metaOnePhaseCommitSuccess),
OnePhaseCommitFailure: metric.NewCounter(metaOnePhaseCommitFailure),

// Pebble metrics.
//
Expand Down
15 changes: 13 additions & 2 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11222,9 +11222,14 @@ func TestReplicaAsyncIntentResolutionOn1PC(t *testing.T) {
Knobs: base.TestingKnobs{Store: &storeKnobs}})
defer s.Stopper().Stop(ctx)

store, err := s.GetStores().(*Stores).GetStore(1)
require.NoError(t, err)
successfulOnePCBefore := store.Metrics().OnePhaseCommitSuccess.Count()
failedOnePCBefore := store.Metrics().OnePhaseCommitFailure.Count()

// Perform a range split between key A and B.
keyA, keyB := roachpb.Key("a"), roachpb.Key("b")
_, _, err := s.SplitRange(keyB)
_, _, err = s.SplitRange(keyB)
require.NoError(t, err)

// Write a value to a key A and B.
Expand All @@ -11248,12 +11253,18 @@ func TestReplicaAsyncIntentResolutionOn1PC(t *testing.T) {
require.NoError(t, err)

// Update the locked value and commit in a single batch. This should hit the
// one-phase commit fast-path and then release the "for update" lock(s).
// one-phase commit fast-path (verified below) and then release the
// "for update" lock(s).
b = txn.NewBatch()
b.Inc(keyA, 1)
err = txn.CommitInBatch(ctx, b)
require.NoError(t, err)

successfulOnePCAfter := store.Metrics().OnePhaseCommitSuccess.Count()
failedOnePCAfter := store.Metrics().OnePhaseCommitFailure.Count()
require.Equal(t, failedOnePCBefore, failedOnePCAfter)
require.Greater(t, successfulOnePCAfter, successfulOnePCBefore)

// If an external lock was acquired, we should see its resolution.
if external {
riReq := <-resIntentC
Expand Down
5 changes: 4 additions & 1 deletion pkg/kv/kvserver/replica_write.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,9 +503,12 @@ func (r *Replica) evaluate1PC(
var batch storage.Batch
defer func() {
// Close the batch unless it's passed to the caller (when the evaluation
// succeeds).
// succeeds). Also increment metrics.
if onePCRes.success != onePCSucceeded {
batch.Close()
r.store.Metrics().OnePhaseCommitFailure.Inc(1)
} else {
r.store.Metrics().OnePhaseCommitSuccess.Inc(1)
}
}()

Expand Down

0 comments on commit 933c4cd

Please sign in to comment.