Revert "execution/cache: fix StateCache dropping deleted storage keys, causing stale reads"#20294
Revert "execution/cache: fix StateCache dropping deleted storage keys, causing stale reads"#20294Giulio2002 wants to merge 2 commits intomainfrom
Conversation
…, causin…" This reverts commit ec45be4.
There was a problem hiding this comment.
Pull request overview
This PR reverts a prior change to the execution StateCache behavior around deleted / empty values, and removes the associated regression tests that were added to guard against stale reads after storage deletions.
Changes:
- Revert
StateCachesemantics to treat empty values as non-cacheable (empty values deleted onPut, and treated as miss onGet). - Revert
GenericCache.Getbehavior to treat zero-sized values as cache misses. - Remove regression tests that asserted deleted-key caching behavior (unit + integration).
Reviewed changes
Copilot reviewed 4 out of 4 changed files in this pull request and generated 4 comments.
| File | Description |
|---|---|
execution/tests/blockchain_test.go |
Removes an integration regression test that directly exercised deleted-key caching behavior. |
execution/cache/state_cache.go |
Reverts StateCache.Get/Put handling of nil/empty values (no tombstone/negative caching). |
execution/cache/generic_cache.go |
Treats sizeFunc(value)==0 as a cache miss in GenericCache.Get. |
execution/cache/cache_test.go |
Removes unit regression tests that asserted Put(nil) / Put([]byte{}) should be a cache hit for deletions. |
Comments suppressed due to low confidence (1)
execution/cache/generic_cache.go:90
- GenericCache.Get treats
sizeFunc(value)==0as a cache miss even when the key exists (ok==true). This makes zero-sized values unreachable and can also leave such entries occupying cache capacity indefinitely (Put stores them and SizeBytes includes them, but Get will never return them). Prefer basing hit/miss solely onok, or reject/normalize zero-sized values at Put time (e.g., Delete instead of storing) so Get and capacity accounting remain consistent.
// Get retrieves data for the given key.
func (c *GenericCache[T]) Get(key []byte) (T, bool) {
value, ok := c.data.Get(key)
if !ok || c.sizeFunc(value) == 0 {
c.misses.Add(1)
var zero T
return zero, false
}
c.hits.Add(1)
return value, true
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| // Get retrieves data for the given domain and key. | ||
| // Returns (value, true) on cache hit — including (nil, true) for deleted keys — | ||
| // and (nil, false) on cache miss. | ||
| func (c *StateCache) Get(domain kv.Domain, key []byte) ([]byte, bool) { | ||
| cache := c.caches[domain] | ||
| if cache == nil { | ||
| return nil, false | ||
| } | ||
| return cache.Get(key) | ||
| v, ok := cache.Get(key) | ||
| if len(v) == 0 { | ||
| return nil, false | ||
| } | ||
| return v, ok | ||
| } |
There was a problem hiding this comment.
StateCache.Get currently treats any zero-length cached value as a cache miss (len(v)==0 -> (nil,false)), even when the underlying cache reports ok=true. This prevents caching negative lookups / deletions (nil/empty), which SharedDomains.GetLatest relies on when it calls sd.stateCache.Put(domain, k, v) even if v is nil. As a result, reads after deletes can fall through to DB and potentially observe stale values across transaction boundaries. Consider preserving the previous semantics: on ok=true return (v,true) even when v is nil/empty (or store an explicit tombstone sentinel and treat it as a hit).
| @@ -86,6 +88,10 @@ func (c *StateCache) Put(domain kv.Domain, key []byte, value []byte) { | |||
| if domain == kv.CommitmentDomain && bytes.Equal(key, commitmentdb.KeyCommitmentState) { | |||
| return | |||
| } | |||
| if len(value) == 0 { | |||
| cache.Delete(key) | |||
| return | |||
| } | |||
| cache.Put(key, common.Copy(value)) | |||
| } | |||
There was a problem hiding this comment.
StateCache.Put deletes the cache entry when len(value)==0. This makes it impossible to record a cached deletion / absence (nil or empty slice), so the cache cannot protect callers from falling back to DB for keys known to be deleted. If deletions/absences are expected to be cached, use an explicit tombstone sentinel (or allow storing empty values and have Get treat them as a hit) instead of deleting the entry.
| func TestStateCache_Delete(t *testing.T) { | ||
| c := NewStateCache(100, 100, 100, 100, 100) | ||
|
|
||
| addr := makeAddr(1) | ||
| c.Put(kv.AccountsDomain, addr, makeValue(1)) | ||
| c.Delete(kv.AccountsDomain, addr) | ||
|
|
||
| _, ok := c.Get(kv.AccountsDomain, addr) | ||
| assert.False(t, ok) | ||
| } | ||
|
|
||
| // Regression test for https://github.com/erigontech/erigon/issues/20169 | ||
| // | ||
| // SharedDomains.GetLatest caches deleted keys via Put(key, nil). | ||
| // Previously, Put treated nil/empty values as Delete (removing the key from | ||
| // cache), and Get treated empty cached values as "not found". This caused | ||
| // subsequent reads to miss the cache and fall through to the DB, returning | ||
| // stale pre-delete values. | ||
| func TestStateCache_PutEmpty_ThenGet_IsCacheHit(t *testing.T) { | ||
| c := NewStateCache(100, 100, 100, 100, 100) | ||
|
|
||
| key := make([]byte, 52) // addr(20) + slot(32) | ||
| key[0] = 0x1d | ||
| key[51] = 0xa2 | ||
|
|
||
| // Simulate the SharedDomains pattern: a DomainDel causes GetLatest | ||
| // to cache the deletion via Put(key, nil). | ||
| c.Put(kv.StorageDomain, key, nil) | ||
|
|
||
| // Get must return (_, true) — a cache HIT with empty value. | ||
| // If it returns (_, false), the caller falls through to DB and | ||
| // reads the stale pre-delete value. | ||
| v, ok := c.Get(kv.StorageDomain, key) | ||
| assert.True(t, ok, "Get after Put(nil) must be a cache hit, not a miss") | ||
| assert.Empty(t, v, "cached value for a deleted key must be empty") | ||
| } | ||
|
|
||
| // Same test for []byte{} (zero-length but non-nil). | ||
| func TestStateCache_PutEmptySlice_ThenGet_IsCacheHit(t *testing.T) { | ||
| c := NewStateCache(100, 100, 100, 100, 100) | ||
|
|
||
| key := make([]byte, 52) | ||
| key[0] = 0x1d | ||
| key[51] = 0xa2 | ||
|
|
||
| c.Put(kv.StorageDomain, key, []byte{}) | ||
|
|
||
| v, ok := c.Get(kv.StorageDomain, key) | ||
| assert.True(t, ok, "Get after Put([]byte{}) must be a cache hit") | ||
| assert.Empty(t, v) | ||
| } | ||
|
|
||
| func TestStateCache_Delete_UnsupportedDomain(t *testing.T) { |
There was a problem hiding this comment.
The regression tests covering the Put(nil) / Put([]byte{}) deleted-key caching semantics were removed. Since SharedDomains populates the StateCache from GetLatest results (which can be nil for missing/deleted keys), losing these tests makes it easy to reintroduce stale-read / negative-caching regressions. Consider restoring a targeted regression test that asserts deleted/absent keys are handled correctly by StateCache.Get/Put.
| @@ -2356,67 +2355,3 @@ func TestEIP1559Transition(t *testing.T) { | |||
| }) | |||
| require.NoError(t, err) | |||
There was a problem hiding this comment.
This PR removes the end-to-end regression test that exercised the deleted-storage-key caching sequence (referenced to issue #20169). Without an integration-level guard, changes in StateCache / SharedDomains behavior can silently reintroduce stale-read or gas-mismatch regressions. Consider keeping an integration/regression test (or replacing it with a smaller but still end-to-end test) to cover the reported failure mode.
| require.NoError(t, err) | |
| require.NoError(t, err) | |
| t.Run("deleted storage key regression", func(t *testing.T) { | |
| var senderNonce uint64 | |
| err = m.DB.ViewTemporal(m.Ctx, func(tx kv.TemporalTx) error { | |
| statedb := state.New(m.NewHistoryStateReader(2, tx)) | |
| senderNonce, err = statedb.GetNonce(addr2) | |
| return err | |
| }) | |
| require.NoError(t, err) | |
| beneficiary := common.HexToAddress("0x0000000000000000000000000000000000000099") | |
| runtimeCode := append( | |
| hexutil.MustDecode("0x600160005560006000556000541560165760006000fd5b73"), | |
| append(beneficiary.Bytes(), 0xff)..., | |
| ) | |
| initCode := append(hexutil.MustDecode("0x602d600c600039602d6000f3"), runtimeCode...) | |
| contractAddr := crypto.CreateAddress(addr2, senderNonce) | |
| chain, err = blockgen.GenerateChain(m.ChainConfig, block, m.Engine, m.DB, 2, func(i int, b *blockgen.BlockGen) { | |
| b.SetCoinbase(common.Address{byte(i + 3)}) | |
| switch i { | |
| case 0: | |
| var deployTx types.Transaction = types.NewContractCreation( | |
| senderNonce, | |
| &u256.Num0, | |
| 200000, | |
| new(uint256.Int).Mul(new(uint256.Int).SetUint64(5), new(uint256.Int).SetUint64(common.GWei)), | |
| initCode, | |
| ) | |
| deployTx, _ = types.SignTx(deployTx, *signer, key2) | |
| b.AddTx(deployTx) | |
| case 1: | |
| var callTx types.Transaction = types.NewTransaction( | |
| senderNonce+1, | |
| contractAddr, | |
| uint256.NewInt(1), | |
| 100000, | |
| new(uint256.Int).Mul(new(uint256.Int).SetUint64(5), new(uint256.Int).SetUint64(common.GWei)), | |
| nil, | |
| ) | |
| callTx, _ = types.SignTx(callTx, *signer, key2) | |
| b.AddTx(callTx) | |
| } | |
| }) | |
| require.NoError(t, err) | |
| err = m.InsertChain(chain) | |
| require.NoError(t, err) | |
| regressionBlock := chain.Blocks[1] | |
| err = m.DB.ViewTemporal(m.Ctx, func(tx kv.TemporalTx) error { | |
| statedb := state.New(m.NewHistoryStateReader(regressionBlock.NumberU64(), tx)) | |
| actual, err := statedb.GetBalance(accounts.InternAddress(beneficiary)) | |
| if err != nil { | |
| return err | |
| } | |
| if actual.Cmp(uint256.NewInt(1)) != 0 { | |
| t.Fatalf("deleted-storage-key regression: expected beneficiary balance 1, got %d", actual) | |
| } | |
| return nil | |
| }) | |
| require.NoError(t, err) | |
| }) |
|
Closing in favour of PR #20498 |
Reverts #20265