diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html
index 372707cf821b..09fa9e685259 100644
--- a/docs/generated/metrics/metrics.html
+++ b/docs/generated/metrics/metrics.html
@@ -703,6 +703,8 @@
STORAGE | tscache.skl.pages | Number of pages in the timestamp cache | Pages | GAUGE | COUNT | AVG | NONE |
STORAGE | tscache.skl.rotations | Number of page rotations in the timestamp cache | Page Rotations | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
STORAGE | txn.commit_waits.before_commit_trigger | Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+STORAGE | txn.server_side.1PC.failure | Number of batches that attempted to commit using 1PC and failed | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+STORAGE | txn.server_side.1PC.success | Number of batches that attempted to commit using 1PC and succeeded | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
STORAGE | txn.server_side_retry.read_evaluation.failure | Number of read batches that were not successfully refreshed server side | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
STORAGE | txn.server_side_retry.read_evaluation.success | Number of read batches that were successfully refreshed server side | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
STORAGE | txn.server_side_retry.uncertainty_interval_error.failure | Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side | KV Transactions | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go
index e1dd41cf83a5..cca2f6b2e20f 100644
--- a/pkg/kv/kvserver/metrics.go
+++ b/pkg/kv/kvserver/metrics.go
@@ -474,6 +474,18 @@ var (
Measurement: "KV Transactions",
Unit: metric.Unit_COUNT,
}
+ metaOnePhaseCommitSuccess = metric.Metadata{
+ Name: "txn.server_side.1PC.success",
+ Help: "Number of batches that attempted to commit using 1PC and succeeded",
+ Measurement: "KV Transactions",
+ Unit: metric.Unit_COUNT,
+ }
+ metaOnePhaseCommitFailure = metric.Metadata{
+ Name: "txn.server_side.1PC.failure",
+ Help: "Number of batches that attempted to commit using 1PC and failed",
+ Measurement: "KV Transactions",
+ Unit: metric.Unit_COUNT,
+ }
//Ingest metrics
metaIngestCount = metric.Metadata{
@@ -2357,6 +2369,8 @@ type StoreMetrics struct {
ReadEvaluationServerSideRetryFailure *metric.Counter
ReadWithinUncertaintyIntervalErrorServerSideRetrySuccess *metric.Counter
ReadWithinUncertaintyIntervalErrorServerSideRetryFailure *metric.Counter
+ OnePhaseCommitSuccess *metric.Counter
+ OnePhaseCommitFailure *metric.Counter
// Storage (pebble) metrics. Some are named RocksDB which is what we used
// before pebble, and this name is kept for backwards compatibility despite
@@ -3035,6 +3049,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
ReadEvaluationServerSideRetryFailure: metric.NewCounter(metaReadEvaluationServerSideRetryFailure),
ReadWithinUncertaintyIntervalErrorServerSideRetrySuccess: metric.NewCounter(metaReadWithinUncertaintyIntervalErrorServerSideRetrySuccess),
ReadWithinUncertaintyIntervalErrorServerSideRetryFailure: metric.NewCounter(metaReadWithinUncertaintyIntervalErrorServerSideRetryFailure),
+ OnePhaseCommitSuccess: metric.NewCounter(metaOnePhaseCommitSuccess),
+ OnePhaseCommitFailure: metric.NewCounter(metaOnePhaseCommitFailure),
// Pebble metrics.
//
diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go
index 564bbd121e4a..31090f854361 100644
--- a/pkg/kv/kvserver/replica_test.go
+++ b/pkg/kv/kvserver/replica_test.go
@@ -11222,9 +11222,14 @@ func TestReplicaAsyncIntentResolutionOn1PC(t *testing.T) {
Knobs: base.TestingKnobs{Store: &storeKnobs}})
defer s.Stopper().Stop(ctx)
+ store, err := s.GetStores().(*Stores).GetStore(1)
+ require.NoError(t, err)
+ successfulOnePCBefore := store.Metrics().OnePhaseCommitSuccess.Count()
+ failedOnePCBefore := store.Metrics().OnePhaseCommitFailure.Count()
+
// Perform a range split between key A and B.
keyA, keyB := roachpb.Key("a"), roachpb.Key("b")
- _, _, err := s.SplitRange(keyB)
+ _, _, err = s.SplitRange(keyB)
require.NoError(t, err)
// Write a value to a key A and B.
@@ -11248,12 +11253,18 @@ func TestReplicaAsyncIntentResolutionOn1PC(t *testing.T) {
require.NoError(t, err)
// Update the locked value and commit in a single batch. This should hit the
- // one-phase commit fast-path and then release the "for update" lock(s).
+ // one-phase commit fast-path (verified below) and then release the
+ // "for update" lock(s).
b = txn.NewBatch()
b.Inc(keyA, 1)
err = txn.CommitInBatch(ctx, b)
require.NoError(t, err)
+ successfulOnePCAfter := store.Metrics().OnePhaseCommitSuccess.Count()
+ failedOnePCAfter := store.Metrics().OnePhaseCommitFailure.Count()
+ require.Equal(t, failedOnePCBefore, failedOnePCAfter)
+ require.Greater(t, successfulOnePCAfter, successfulOnePCBefore)
+
// If an external lock was acquired, we should see its resolution.
if external {
riReq := <-resIntentC
diff --git a/pkg/kv/kvserver/replica_write.go b/pkg/kv/kvserver/replica_write.go
index f9d0e61bdf67..b67697a1c813 100644
--- a/pkg/kv/kvserver/replica_write.go
+++ b/pkg/kv/kvserver/replica_write.go
@@ -503,9 +503,12 @@ func (r *Replica) evaluate1PC(
var batch storage.Batch
defer func() {
// Close the batch unless it's passed to the caller (when the evaluation
- // succeeds).
+ // succeeds). Also increment metrics.
if onePCRes.success != onePCSucceeded {
batch.Close()
+ r.store.Metrics().OnePhaseCommitFailure.Inc(1)
+ } else {
+ r.store.Metrics().OnePhaseCommitSuccess.Inc(1)
}
}()