From c03b5543680cffe4e33f2e71b559de62a3127747 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 23 Oct 2018 08:34:50 -0400 Subject: [PATCH] kv: avoid transaction too large error if already refreshed This fixes a bug where a txn that already refreshed its spans and no longer needed them could still run into a "transaction is too large to complete" error. This will be backported to 2.0 and 2.1. Release note (bug fix): Fix bug where transactions unnecessarily hit "too large" error. --- pkg/kv/dist_sender_server_test.go | 26 ++++++++++++++++++++++++ pkg/kv/txn_interceptor_span_refresher.go | 10 ++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/pkg/kv/dist_sender_server_test.go b/pkg/kv/dist_sender_server_test.go index 57375150e5c4..ef398ea186d4 100644 --- a/pkg/kv/dist_sender_server_test.go +++ b/pkg/kv/dist_sender_server_test.go @@ -2078,6 +2078,32 @@ func TestTxnCoordSenderRetries(t *testing.T) { }, expFailure: "transaction is too large to complete; try splitting into pieces", }, + { + // If we've exhausted the limit for tracking refresh spans but we + // already refreshed, keep running the txn. + name: "forwarded timestamp with too many refreshes, read only", + afterTxnStart: func(ctx context.Context, db *client.DB) error { + return db.Put(ctx, "a", "value") + }, + retryable: func(ctx context.Context, txn *client.Txn) error { + // Make the batch large enough such that when we accounted for + // all of its spans then we exceed the limit on refresh spans. + // This is not an issue because we refresh before tracking their + // spans. + keybase := strings.Repeat("a", 1024) + maxRefreshBytes := kv.MaxTxnRefreshSpansBytes.Get(&s.ClusterSettings().SV) + scanToExceed := int(maxRefreshBytes) / len(keybase) + b := txn.NewBatch() + // Hit the uncertainty error at the beginning of the batch. + b.Get("a") + for i := 0; i < scanToExceed; i++ { + key := roachpb.Key(fmt.Sprintf("%s%10d", keybase, i)) + b.Scan(key, key.Next()) + } + return txn.Run(ctx, b) + }, + filter: newUncertaintyFilter(roachpb.Key([]byte("a"))), + }, { // Even if accounting for the refresh spans would have exhausted the // limit for tracking refresh spans and our transaction's timestamp diff --git a/pkg/kv/txn_interceptor_span_refresher.go b/pkg/kv/txn_interceptor_span_refresher.go index ce55e0015254..ce9143a68891 100644 --- a/pkg/kv/txn_interceptor_span_refresher.go +++ b/pkg/kv/txn_interceptor_span_refresher.go @@ -141,7 +141,15 @@ func (sr *txnSpanRefresher) SendLocked( // exhausted, return a non-retryable error indicating that the // transaction is too large and should potentially be split. // We do this to avoid endlessly retrying a txn likely refail. - if sr.refreshInvalid && (br.Txn.OrigTimestamp != br.Txn.Timestamp) { + // + // TODO(nvanbenschoten): this is duplicating some of the logic + // in IsSerializablePushAndRefreshNotPossible. We plan to remove + // this all shortly (#30074), but if that changes, we should clean + // this overlap up. + ts := br.Txn.OrigTimestamp + ts.Forward(br.Txn.RefreshedTimestamp) + pushed := ts != br.Txn.Timestamp + if pushed && sr.refreshInvalid { return nil, roachpb.NewErrorWithTxn( errors.New("transaction is too large to complete; try splitting into pieces"), br.Txn, )