From bf596c26f324496edf3276adcc9a8609772a43be Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Sat, 15 Aug 2020 14:11:40 +0700 Subject: [PATCH] [merge after release] dupsort of plain state (#913) * dupsort of plain state * rebase master --- cmd/hack/hack.go | 353 +----------------- common/dbutils/bucket.go | 41 +- common/debug/experiments.go | 12 - ethdb/kv_abstract_test.go | 8 +- ethdb/kv_lmdb.go | 22 +- ...dupsort_hash_state.go => dupsort_state.go} | 37 +- ...sh_state_test.go => dupsort_state_test.go} | 66 +++- migrations/migrations.go | 3 +- 8 files changed, 150 insertions(+), 392 deletions(-) rename migrations/{dupsort_hash_state.go => dupsort_state.go} (52%) rename migrations/{dupsort_hash_state_test.go => dupsort_state_test.go} (50%) diff --git a/cmd/hack/hack.go b/cmd/hack/hack.go index 17c462fa0ad..55a0987beff 100644 --- a/cmd/hack/hack.go +++ b/cmd/hack/hack.go @@ -59,7 +59,6 @@ var name = flag.String("name", "", "name to add to the file names") var chaindata = flag.String("chaindata", "chaindata", "path to the chaindata database file") var bucket = flag.String("bucket", "", "bucket in the database") var hash = flag.String("hash", "0x00", "image for preimage or state root for testBlockHashes action") -var preImage = flag.String("preimage", "0x00", "preimage") func check(e error) { if e != nil { @@ -313,7 +312,7 @@ func mychart() { func accountSavings(db *bolt.DB) (int, int) { emptyRoots := 0 emptyCodes := 0 - db.View(func(tx *bolt.Tx) error { + check(db.View(func(tx *bolt.Tx) error { b := tx.Bucket([]byte(dbutils.CurrentStateBucket)) c := b.Cursor() for k, v := c.First(); k != nil; k, v = c.Next() { @@ -328,16 +327,10 @@ func accountSavings(db *bolt.DB) (int, int) { } } return nil - }) + })) return emptyRoots, emptyCodes } -func printBuckets(db *bolt.DB) { - for _, bucket := range dbutils.Buckets { - fmt.Printf("%s\n", bucket) - } -} - func bucketStats(chaindata string) { t := ethdb.Lmdb bucketList := dbutils.Buckets @@ -994,13 +987,6 @@ func preimage(chaindata string, image common.Hash) { fmt.Printf("%x\n", p) } -func addPreimage(chaindata string, image common.Hash, preimage []byte) { - ethDb := ethdb.MustOpen(chaindata) - defer ethDb.Close() - err := ethDb.Put(dbutils.PreimagePrefix, image[:], preimage) - check(err) -} - func printBranches(block uint64) { //ethDb := ethdb.MustOpen("/home/akhounov/.ethereum/geth/chaindata") ethDb := ethdb.MustOpen(node.DefaultDataDir() + "/testnet/geth/chaindata") @@ -1250,41 +1236,6 @@ func getModifiedAccounts(chaindata string) { fmt.Printf("Len(addrs)=%d\n", len(addrs)) } -func resetState(chaindata string) { - db := ethdb.MustOpen(chaindata) - defer db.Close() - check(db.ClearBuckets( - dbutils.CurrentStateBucket, - dbutils.AccountChangeSetBucket, - dbutils.StorageChangeSetBucket, - dbutils.ContractCodeBucket, - dbutils.PlainStateBucket, - dbutils.PlainAccountChangeSetBucket, - dbutils.PlainStorageChangeSetBucket, - dbutils.PlainContractCodeBucket, - dbutils.IncarnationMapBucket, - dbutils.CodeBucket, - )) - core.UsePlainStateExecution = true - _, _, err := core.DefaultGenesisBlock().CommitGenesisState(db, false) - check(err) - err = stages.SaveStageProgress(db, stages.Execution, 0, nil) - check(err) - fmt.Printf("Reset state done\n") -} - -func resetHashedState(chaindata string) { - db := ethdb.MustOpen(chaindata) - defer db.Close() - check(db.ClearBuckets( - dbutils.IntermediateTrieHashBucket, - dbutils.IntermediateTrieHashBucket, - )) - err := stages.SaveStageProgress(db, stages.HashState, 0, nil) - check(err) - fmt.Printf("Reset hashed state done\n") -} - type Receiver struct { defaultReceiver *trie.DefaultReceiver accountMap map[string]*accounts.Account @@ -1538,285 +1489,6 @@ func testGetProof(chaindata string, address common.Address, rewind int, regen bo return nil } -func findPreimage(chaindata string, hash common.Hash) error { - db := ethdb.MustOpen(chaindata) - defer db.Close() - if err := db.KV().View(context.Background(), func(tx ethdb.Tx) error { - c := tx.Cursor(dbutils.PlainStateBucket) - k, _, e := c.First() - if e != nil { - return e - } - for k != nil { - if bytes.Equal(crypto.Keccak256(k), hash[:]) { - fmt.Printf("preimage: %x\n", k) - break - } - k, _, e = c.Next() - if e != nil { - return e - } - } - return nil - }); err != nil { - return err - } - return nil -} - -/* -CurrentStateBucket layout which utilises DupSort feature of LMDB (store multiple values inside 1 key). -LMDB stores multiple values of 1 key in B+ tree (as keys which have no values). -It allows do Seek by values and other features of LMDB. -You can think about it as Sub-Database which doesn't store any values. -------------------------------------------------------------- - key | value -------------------------------------------------------------- -[acc_hash] | [acc_value] -[acc_hash]+[inc] | [storage1_hash]+[storage1_value] - | [storage2_hash]+[storage2_value] // this value has no own key. it's 2nd value of [acc_hash]+[inc] key. - | [storage3_hash]+[storage3_value] - | ... -[acc_hash]+[old_inc] | [storage1_hash]+[storage1_value] - | ... -[acc2_hash] | [acc2_value] - ... - -On 5M block CurrentState bucket using 5.9Gb. This layout using 2.3Gb. See dupSortState() - -As further evolution of this idea: -- Can add CodeHash as 2-nd value of [acc_hash] key (just use 1st byte to store type of value) -- And drop ContractCodeBucket -*/ -func dupSortState(chaindata string) { - db := ethdb.MustOpen(chaindata) - defer db.Close() - - dbFile := "statedb_dupsort" - dbFile2 := "statedb_no_dupsort" - - err := os.MkdirAll(dbFile, 0744) - check(err) - env, err := lmdb.NewEnv() - check(err) - err = env.SetMaxDBs(100) - check(err) - err = env.SetMapSize(32 << 40) // 32TB - check(err) - - var flags uint = lmdb.NoReadahead - err = env.Open(dbFile, flags, 0664) - check(err) - defer env.Close() - - err = os.MkdirAll(dbFile2, 0744) - check(err) - env2, err := lmdb.NewEnv() - check(err) - err = env2.SetMaxDBs(100) - check(err) - err = env2.SetMapSize(32 << 40) // 32TB - check(err) - - var flags2 uint = lmdb.NoReadahead - err = env2.Open(dbFile2, flags2, 0664) - check(err) - defer env.Close() - - var newStateBucket lmdb.DBI - err = env.Update(func(tx *lmdb.Txn) error { - var createErr error - newStateBucket, createErr = tx.OpenDBI(string(dbutils.CurrentStateBucket), lmdb.Create|lmdb.DupSort) - check(createErr) - err = tx.Drop(newStateBucket, false) - check(err) - return nil - }) - check(err) - - var newStateBucket2 lmdb.DBI - err = env2.Update(func(tx *lmdb.Txn) error { - var createErr error - newStateBucket2, createErr = tx.OpenDBI(string(dbutils.CurrentStateBucket), lmdb.Create) - check(createErr) - err = tx.Drop(newStateBucket2, false) - check(err) - return nil - }) - check(err) - - err = db.KV().View(context.Background(), func(tx ethdb.Tx) error { - c := tx.Cursor(dbutils.CurrentStateBucket) - txn, _ := env.BeginTxn(nil, 0) - txn2, _ := env2.BeginTxn(nil, 0) - - i := 0 - for k, v, err1 := c.First(); k != nil; k, v, err = c.Next() { - check(err1) - i++ - if i%1_000_000 == 0 { - err = txn.Commit() - check(err) - txn, _ = env.BeginTxn(nil, 0) - err = txn2.Commit() - check(err) - txn2, _ = env2.BeginTxn(nil, 0) - fmt.Printf("%x\n", k[:2]) - } - - err = txn2.Put(newStateBucket2, common.CopyBytes(k), common.CopyBytes(v), 0) - check(err) - - if len(k) == common.HashLength { - err = txn.Put(newStateBucket, common.CopyBytes(k), common.CopyBytes(v), lmdb.AppendDup) - check(err) - } else { - prefix := k[:common.HashLength+common.IncarnationLength] - suffix := k[common.HashLength+common.IncarnationLength:] - err = txn.Put(newStateBucket, common.CopyBytes(prefix), append(suffix, v...), lmdb.AppendDup) - check(err) - } - } - err = txn.Commit() - check(err) - err = txn2.Commit() - check(err) - - return nil - }) - check(err) - - err = env2.View(func(txn *lmdb.Txn) (err error) { - st, err := txn.Stat(newStateBucket2) - check(err) - fmt.Printf("Current bucket size: %s\n", common.StorageSize((st.LeafPages+st.BranchPages+st.OverflowPages)*uint64(os.Getpagesize()))) - return nil - }) - check(err) - - err = env.View(func(txn *lmdb.Txn) (err error) { - st, err := txn.Stat(newStateBucket) - check(err) - fmt.Printf("New bucket size: %s\n", common.StorageSize((st.LeafPages+st.BranchPages+st.OverflowPages)*uint64(os.Getpagesize()))) - return nil - }) - check(err) - - err = env.View(func(txn *lmdb.Txn) (err error) { - - cur, err := txn.OpenCursor(newStateBucket) - check(err) - i := 0 - for k, v, err := cur.Get(nil, nil, lmdb.First); !lmdb.IsNotFound(err); k, v, err = cur.Get(nil, nil, lmdb.Next) { - check(err) - i++ - if i == 100 { // print first 100 - return nil - } - if len(k) == 32 { - fmt.Printf("Acc: %x, %x\n", k, v) - continue - } - - // try to seek on incarnation=2 and check that no incarnation=1 are visible - for _, v, err := cur.Get(nil, nil, lmdb.FirstDup); !lmdb.IsNotFound(err); _, v, err = cur.Get(nil, nil, lmdb.NextDup) { - fmt.Printf("Storage: %x, %x\n", k, v) - } - } - return nil - }) - check(err) -} - -func deleteLargeDupSortKey() { - dbFile := "statedb_dupsort_delete" - err := os.MkdirAll(dbFile, 0744) - check(err) - env, err := lmdb.NewEnv() - check(err) - err = env.SetMaxDBs(100) - check(err) - err = env.SetMapSize(32 << 40) // 32TB - check(err) - - var flags uint = lmdb.NoReadahead - err = env.Open(dbFile, flags, 0664) - check(err) - defer env.Close() - - var newStateBucket lmdb.DBI - err = env.Update(func(tx *lmdb.Txn) error { - var createErr error - newStateBucket, createErr = tx.OpenDBI(string(dbutils.CurrentStateBucket), lmdb.Create|lmdb.DupSort) - check(createErr) - return nil - }) - check(err) - - k := common.FromHex("000034c2eb874b6125c8a84e69fe77640d468ccef4c02a2d20c284446dbb1460") - - txn, _ := env.BeginTxn(nil, 0) - err = txn.Drop(newStateBucket, false) - check(err) - - for i := uint64(0); i < 1_000_000_000; i++ { - if i%10_000_000 == 0 { - err = txn.Commit() - check(err) - txn, err = env.BeginTxn(nil, 0) - check(err) - fmt.Printf("%dM\n", i/1_000_000) - } - - v := make([]byte, 8) - binary.BigEndian.PutUint64(v, i) - err = txn.Put(newStateBucket, k, v, lmdb.AppendDup) - check(err) - } - err = txn.Commit() - check(err) - - //print new bucket size - err = env.View(func(txn *lmdb.Txn) (err error) { - st, err := txn.Stat(newStateBucket) - check(err) - fmt.Printf("Current bucket size: %s\n", common.StorageSize((st.LeafPages+st.BranchPages+st.OverflowPages)*uint64(os.Getpagesize()))) - return nil - }) - check(err) - - // delete single DupSort key - t := time.Now() - err = env.Update(func(txn *lmdb.Txn) (err error) { - cur, err := txn.OpenCursor(newStateBucket) - check(err) - k, _, err = cur.Get(k, nil, lmdb.SetRange) - check(err) - err = cur.Del(lmdb.NoDupData) - check(err) - return nil - }) - check(err) - fmt.Printf("Deletion took: %s\n", time.Since(t)) - - // check that no keys are left in the bucket - err = env.View(func(txn *lmdb.Txn) (err error) { - st, err := txn.Stat(newStateBucket) - check(err) - fmt.Printf("Current bucket size: %s\n", common.StorageSize((st.LeafPages+st.BranchPages+st.OverflowPages)*uint64(os.Getpagesize()))) - - cur, err := txn.OpenCursor(newStateBucket) - check(err) - for k, v, err := cur.Get(nil, nil, lmdb.First); !lmdb.IsNotFound(err); k, v, err = cur.Get(nil, nil, lmdb.Next) { - check(err) - fmt.Printf("Still see some key: %x, %x\n", k, v) - panic(1) - } - return nil - }) - check(err) -} - func fixStages(chaindata string) error { db := ethdb.MustOpen(chaindata) defer db.Close() @@ -2040,7 +1712,6 @@ func main() { //printBuckets(db) //printTxHashes() //relayoutKeys() - //testRedis() //upgradeBlocks() //compareTries() if *action == "invTree" { @@ -2048,14 +1719,6 @@ func main() { } //invTree("iw", "ir", "id", *block, true) //loadAccount() - if *action == "preimage" { - if err := findPreimage(*chaindata, common.HexToHash(*hash)); err != nil { - fmt.Printf("Error: %v\n", err) - } - } - if *action == "addPreimage" { - addPreimage(*chaindata, common.HexToHash(*hash), common.FromHex(*preImage)) - } //printBranches(uint64(*block)) if *action == "execToBlock" { execToBlock(*chaindata, uint64(*block), false) @@ -2096,12 +1759,6 @@ func main() { if *action == "slice" { dbSlice(*chaindata, *bucket, common.FromHex(*hash)) } - if *action == "resetState" { - resetState(*chaindata) - } - if *action == "resetHashed" { - resetHashedState(*chaindata) - } if *action == "getProof" { if err := testGetProof(*chaindata, common.HexToAddress(*account), *rewind, false); err != nil { fmt.Printf("Error: %v\n", err) @@ -2127,12 +1784,6 @@ func main() { fmt.Printf("Error: %v\n", err) } } - if *action == "deleteLargeDupSortKey" { - deleteLargeDupSortKey() - } - if *action == "dupSortState" { - dupSortState(*chaindata) - } if *action == "changeSetStats" { if err := changeSetStats(*chaindata, uint64(*block), uint64(*block)+uint64(*rewind)); err != nil { fmt.Printf("Error: %v\n", err) diff --git a/common/dbutils/bucket.go b/common/dbutils/bucket.go index 12c9ad13201..699bd0f8a8c 100644 --- a/common/dbutils/bucket.go +++ b/common/dbutils/bucket.go @@ -4,7 +4,6 @@ import ( "sort" "strings" - "github.com/ledgerwatch/turbo-geth/common/debug" "github.com/ledgerwatch/turbo-geth/metrics" ) @@ -12,13 +11,32 @@ import ( var ( // "Plain State". The same as CurrentStateBucket, but the keys arent' hashed. - // Contains Accounts: - // key - address (unhashed) - // value - account encoded for storage - // Contains Storage: - // key - address (unhashed) + incarnation + storage key (unhashed) - // value - storage value(common.hash) - PlainStateBucket = "PLAIN-CST" + /* + Logical layout: + Contains Accounts: + key - address (unhashed) + value - account encoded for storage + Contains Storage: + key - address (unhashed) + incarnation + storage key (unhashed) + value - storage value(common.hash) + + Physical layout: + PlainStateBucket and CurrentStateBucket utilises DupSort feature of LMDB (store multiple values inside 1 key). + ------------------------------------------------------------- + key | value + ------------------------------------------------------------- + [acc_hash] | [acc_value] + [acc_hash]+[inc] | [storage1_hash]+[storage1_value] + | [storage2_hash]+[storage2_value] // this value has no own key. it's 2nd value of [acc_hash]+[inc] key. + | [storage3_hash]+[storage3_value] + | ... + [acc_hash]+[old_inc] | [storage1_hash]+[storage1_value] + | ... + [acc2_hash] | [acc2_value] + ... + */ + PlainStateBucket = "PLAIN-CST2" + PlainStateBucketOld1 = "PLAIN-CST" // "Plain State" //key - address+incarnation @@ -204,13 +222,14 @@ var DeprecatedBuckets = []string{ SyncStageProgressOld1, SyncStageUnwindOld1, CurrentStateBucketOld1, + PlainStateBucketOld1, } var BucketsCfg = map[string]*BucketConfigItem{} type BucketConfigItem struct { ID int - IsDupsort bool + IsDupSort bool DupToLen int DupFromLen int } @@ -232,7 +251,7 @@ var dupSortConfig = []dupSortConfigEntry{ }, { Bucket: PlainStateBucket, - IsDupSort: debug.IsPlainStateDupsortEnabled(), + IsDupSort: true, ToLen: 28, FromLen: 60, }, @@ -262,7 +281,7 @@ func createBucketConfig(id int, name string) *BucketConfigItem { cfg.DupFromLen = dupCfg.FromLen cfg.DupToLen = dupCfg.ToLen - cfg.IsDupsort = dupCfg.IsDupSort + cfg.IsDupSort = dupCfg.IsDupSort } return cfg diff --git a/common/debug/experiments.go b/common/debug/experiments.go index 07c81b04ed4..934819771bb 100644 --- a/common/debug/experiments.go +++ b/common/debug/experiments.go @@ -68,15 +68,3 @@ func TestDB() string { }) return testDB } - -var ( - dupsortPlain bool - getDupsortPlain sync.Once -) - -func IsPlainStateDupsortEnabled() bool { - getDupsortPlain.Do(func() { - _, dupsortPlain = os.LookupEnv("DUPSORT_PLAIN") - }) - return dupsortPlain -} diff --git a/ethdb/kv_abstract_test.go b/ethdb/kv_abstract_test.go index 08015203b08..d86a8b6db10 100644 --- a/ethdb/kv_abstract_test.go +++ b/ethdb/kv_abstract_test.go @@ -28,10 +28,10 @@ func TestManagedTx(t *testing.T) { bucketID := 0 bucket1 := dbutils.Buckets[bucketID] bucket2 := dbutils.Buckets[bucketID+1] - dbutils.BucketsCfg[bucket1].IsDupsort = true + dbutils.BucketsCfg[bucket1].IsDupSort = true dbutils.BucketsCfg[bucket1].DupFromLen = 6 dbutils.BucketsCfg[bucket1].DupToLen = 4 - dbutils.BucketsCfg[bucket2].IsDupsort = false + dbutils.BucketsCfg[bucket2].IsDupSort = false writeDBs, readDBs, closeAll := setupDatabases() defer closeAll() @@ -110,7 +110,9 @@ func setupDatabases() (writeDBs []ethdb.KV, readDBs []ethdb.KV, close func()) { return writeDBs, readDBs, func() { grpcServer.Stop() - conn.Close() + if err := conn.Close(); err != nil { + panic(err) + } for _, db := range readDBs { db.Close() diff --git a/ethdb/kv_lmdb.go b/ethdb/kv_lmdb.go index cae7e1719ab..3d1a713ecff 100644 --- a/ethdb/kv_lmdb.go +++ b/ethdb/kv_lmdb.go @@ -246,7 +246,7 @@ type lmdbTx struct { } type lmdbBucket struct { - isDupsort bool + isDupSort bool dupFrom int dupTo int name string @@ -325,7 +325,7 @@ func (tx *lmdbTx) CreateBucket(name string) error { if !tx.db.opts.readOnly { flags |= lmdb.Create } - if dbutils.BucketsCfg[name].IsDupsort { + if dbutils.BucketsCfg[name].IsDupSort { flags |= lmdb.DupSort } dbi, err := tx.tx.OpenDBI(name, flags) @@ -384,7 +384,7 @@ func (tx *lmdbTx) Bucket(name string) Bucket { panic(fmt.Errorf("%w: %s", ErrUnknownBucket, name)) } - return &lmdbBucket{tx: tx, dbi: tx.db.buckets[name], isDupsort: cfg.IsDupsort, dupFrom: cfg.DupFromLen, dupTo: cfg.DupToLen, name: name} + return &lmdbBucket{tx: tx, dbi: tx.db.buckets[name], isDupSort: cfg.IsDupSort, dupFrom: cfg.DupFromLen, dupTo: cfg.DupToLen, name: name} } func (tx *lmdbTx) Commit(ctx context.Context) error { @@ -458,7 +458,7 @@ func (c *LmdbCursor) NoValues() NoValuesCursor { func (tx *lmdbTx) Get(bucket string, key []byte) ([]byte, error) { dbi := tx.db.buckets[bucket] cfg := dbutils.BucketsCfg[bucket] - if cfg.IsDupsort { + if cfg.IsDupSort { return tx.getDupSort(bucket, dbi, cfg, key) } @@ -503,7 +503,7 @@ func (tx *lmdbTx) getDupSort(bucket string, dbi lmdb.DBI, cfg *dbutils.BucketCon } func (b lmdbBucket) Get(key []byte) ([]byte, error) { - if b.isDupsort { + if b.isDupSort { return b.getDupSort(key) } @@ -620,7 +620,7 @@ func (c *LmdbCursor) Last() ([]byte, []byte, error) { return []byte{}, nil, err } - if c.bucketCfg.IsDupsort { + if c.bucketCfg.IsDupSort { if k == nil { return k, v, nil } @@ -644,7 +644,7 @@ func (c *LmdbCursor) Seek(seek []byte) (k, v []byte, err error) { } } - if c.bucketCfg.IsDupsort { + if c.bucketCfg.IsDupSort { return c.seekDupSort(seek) } @@ -734,7 +734,7 @@ func (c *LmdbCursor) Next() (k, v []byte, err error) { default: } - if c.bucketCfg.IsDupsort { + if c.bucketCfg.IsDupSort { return c.nextDupSort() } @@ -791,7 +791,7 @@ func (c *LmdbCursor) Delete(key []byte) error { } } - if c.bucketCfg.IsDupsort { + if c.bucketCfg.IsDupSort { return c.deleteDupSort(key) } @@ -854,7 +854,7 @@ func (c *LmdbCursor) Put(key []byte, value []byte) error { } } - if c.bucketCfg.IsDupsort { + if c.bucketCfg.IsDupSort { return c.putDupSort(key, value) } @@ -986,7 +986,7 @@ func (c *LmdbCursor) Append(key []byte, value []byte) error { } b := c.bucketCfg from, to := b.DupFromLen, b.DupToLen - if b.IsDupsort { + if b.IsDupSort { if len(key) != from && len(key) >= to { return fmt.Errorf("dupsort bucket: %s, can have keys of len==%d and len<%d. key: %x", c.bucketName, from, to, key) } diff --git a/migrations/dupsort_hash_state.go b/migrations/dupsort_state.go similarity index 52% rename from migrations/dupsort_hash_state.go rename to migrations/dupsort_state.go index 7fc41011289..dd0567ad6a0 100644 --- a/migrations/dupsort_hash_state.go +++ b/migrations/dupsort_state.go @@ -6,7 +6,7 @@ import ( "github.com/ledgerwatch/turbo-geth/ethdb" ) -var dupsortHashState = Migration{ +var dupSortHashState = Migration{ Name: "dupsort_hash_state", Up: func(db ethdb.Database, datadir string, OnLoadCommit etl.LoadCommitHandler) error { if exists, err := db.(ethdb.NonTransactional).BucketExists(dbutils.CurrentStateBucketOld1); err != nil { @@ -40,3 +40,38 @@ var dupsortHashState = Migration{ return nil }, } + +var dupSortPlainState = Migration{ + Name: "dupsort_plain_state", + Up: func(db ethdb.Database, datadir string, OnLoadCommit etl.LoadCommitHandler) error { + if exists, err := db.(ethdb.NonTransactional).BucketExists(dbutils.PlainStateBucketOld1); err != nil { + return err + } else if !exists { + return OnLoadCommit(db, nil, true) + } + + if err := db.(ethdb.NonTransactional).ClearBuckets(dbutils.PlainStateBucket); err != nil { + return err + } + extractFunc := func(k []byte, v []byte, next etl.ExtractNextFunc) error { + return next(k, k, v) + } + + if err := etl.Transform( + db, + dbutils.PlainStateBucketOld1, + dbutils.PlainStateBucket, + datadir, + extractFunc, + etl.IdentityLoadFunc, + etl.TransformArgs{OnLoadCommit: OnLoadCommit}, + ); err != nil { + return err + } + + if err := db.(ethdb.NonTransactional).DropBuckets(dbutils.PlainStateBucketOld1); err != nil { + return err + } + return nil + }, +} diff --git a/migrations/dupsort_hash_state_test.go b/migrations/dupsort_state_test.go similarity index 50% rename from migrations/dupsort_hash_state_test.go rename to migrations/dupsort_state_test.go index 62a3fae6871..f76f3b1a31e 100644 --- a/migrations/dupsort_hash_state_test.go +++ b/migrations/dupsort_state_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestDupsortHashState(t *testing.T) { +func TestDupSortHashState(t *testing.T) { require, db := require.New(t), ethdb.NewMemDatabase() err := db.KV().Update(context.Background(), func(tx ethdb.Tx) error { @@ -30,7 +30,7 @@ func TestDupsortHashState(t *testing.T) { require.NoError(err) migrator := NewMigrator() - migrator.Migrations = []Migration{dupsortHashState} + migrator.Migrations = []Migration{dupSortHashState} err = migrator.Apply(db, "") require.NoError(err) @@ -73,3 +73,65 @@ func TestDupsortHashState(t *testing.T) { require.Equal([]byte(storageKey)[keyLen:], v[:common.HashLength]) require.Equal([]byte{2}, v[common.HashLength:]) } + +func TestDupSortPlainState(t *testing.T) { + require, db := require.New(t), ethdb.NewMemDatabase() + + err := db.KV().Update(context.Background(), func(tx ethdb.Tx) error { + return tx.(ethdb.BucketMigrator).CreateBucket(dbutils.PlainStateBucketOld1) + }) + require.NoError(err) + + accKey := string(common.FromHex(fmt.Sprintf("%040x", 0))) + inc := string(common.FromHex("0000000000000001")) + storageKey := accKey + inc + string(common.FromHex(fmt.Sprintf("%064x", 0))) + + err = db.Put(dbutils.PlainStateBucketOld1, []byte(accKey), []byte{1}) + require.NoError(err) + err = db.Put(dbutils.PlainStateBucketOld1, []byte(storageKey), []byte{2}) + require.NoError(err) + + migrator := NewMigrator() + migrator.Migrations = []Migration{dupSortPlainState} + err = migrator.Apply(db, "") + require.NoError(err) + + // test high-level data access didn't change + i := 0 + err = db.Walk(dbutils.PlainStateBucket, nil, 0, func(k, v []byte) (bool, error) { + i++ + return true, nil + }) + require.NoError(err) + require.Equal(2, i) + + v, err := db.Get(dbutils.PlainStateBucket, []byte(accKey)) + require.NoError(err) + require.Equal([]byte{1}, v) + + v, err = db.Get(dbutils.PlainStateBucket, []byte(storageKey)) + require.NoError(err) + require.Equal([]byte{2}, v) + + // test low-level data layout + rawKV := db.KV().(*ethdb.LmdbKV) + env := rawKV.Env() + allDBI := rawKV.AllDBI() + + tx, err := env.BeginTxn(nil, lmdb.Readonly) + require.NoError(err) + c, err := tx.OpenCursor(allDBI[dbutils.PlainStateBucket]) + require.NoError(err) + + k, v, err := c.Get([]byte(accKey), nil, lmdb.Set) + require.NoError(err) + require.Equal([]byte(accKey), k) + require.Equal([]byte{1}, v) + + keyLen := common.AddressLength + common.IncarnationLength + k, v, err = c.Get([]byte(storageKey)[:keyLen], []byte(storageKey)[keyLen:], lmdb.GetBothRange) + require.NoError(err) + require.Equal([]byte(storageKey)[:keyLen], k) + require.Equal([]byte(storageKey)[keyLen:], v[:common.HashLength]) + require.Equal([]byte{2}, v[common.HashLength:]) +} diff --git a/migrations/migrations.go b/migrations/migrations.go index d4451274237..c62de04e24c 100644 --- a/migrations/migrations.go +++ b/migrations/migrations.go @@ -57,7 +57,8 @@ var migrations = []Migration{ unwindStagesToUseNamedKeys, stagedsyncToUseStageBlockhashes, unwindStagedsyncToUseStageBlockhashes, - dupsortHashState, + dupSortHashState, + dupSortPlainState, } type Migration struct {