From 71a442ffbcbef5d83919b2a7b48f8ea4c323f001 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Mon, 16 Jan 2017 16:21:31 +0000 Subject: [PATCH 1/6] trie: Make iterator postorder instead of preorder --- trie/iterator.go | 83 ++++++++++++++++++++----------------------- trie/iterator_test.go | 2 ++ 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/trie/iterator.go b/trie/iterator.go index afde6e19e6185..4722b99269e03 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -132,70 +132,63 @@ func (it *NodeIterator) step() error { state.hash = root } it.stack = append(it.stack, state) - } else { - // Continue iterating at the previous node otherwise. - it.stack = it.stack[:len(it.stack)-1] - if len(it.stack) == 0 { - it.trie = nil - return nil - } + return nil } // Continue iteration to the next child for { + if len(it.stack) == 0 { + it.trie = nil + return nil + } parent := it.stack[len(it.stack)-1] ancestor := parent.hash if (ancestor == common.Hash{}) { ancestor = parent.parent } if node, ok := parent.node.(*fullNode); ok { - // Full node, traverse all children, then the node itself - if parent.child >= len(node.Children) { + // Full node, iterate over children + parent.child++ + if parent.child < len(node.Children) { + it.stack = append(it.stack, &nodeIteratorState{ + hash: common.BytesToHash(node.flags.hash), + node: node.Children[parent.child], + parent: ancestor, + child: -1, + }) break } - for parent.child++; parent.child < len(node.Children); parent.child++ { - if current := node.Children[parent.child]; current != nil { - it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), - node: current, - parent: ancestor, - child: -1, - }) - break - } - } } else if node, ok := parent.node.(*shortNode); ok { - // Short node, traverse the pointer singleton child, then the node itself - if parent.child >= 0 { + // Short node, return the pointer singleton child + if parent.child < 0 { + parent.child++ + it.stack = append(it.stack, &nodeIteratorState{ + hash: common.BytesToHash(node.flags.hash), + node: node.Val, + parent: ancestor, + child: -1, + }) break } - parent.child++ - it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), - node: node.Val, - parent: ancestor, - child: -1, - }) } else if hash, ok := parent.node.(hashNode); ok { - // Hash node, resolve the hash child from the database, then the node itself - if parent.child >= 0 { - break - } - parent.child++ + // Hash node, resolve the hash child from the database + if parent.child < 0 { + parent.child++ - node, err := it.trie.resolveHash(hash, nil, nil) - if err != nil { - return err + node, err := it.trie.resolveHash(hash, nil, nil) + if err != nil { + return err + } + it.stack = append(it.stack, &nodeIteratorState{ + hash: common.BytesToHash(hash), + node: node, + parent: ancestor, + child: -1, + }) + break } - it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(hash), - node: node, - parent: ancestor, - child: -1, - }) - } else { - break } + it.stack = it.stack[:len(it.stack)-1] } return nil } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index c56ac85be5fdc..41672d8d4fefd 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -17,6 +17,7 @@ package trie import ( + "fmt" "testing" "github.com/ethereum/go-ethereum/common" @@ -44,6 +45,7 @@ func TestIterator(t *testing.T) { found := make(map[string]string) it := NewIterator(trie) for it.Next() { + fmt.Printf("%s=%s\n", it.Key, it.Value) found[string(it.Key)] = string(it.Value) } From 4d7c0cae7288b52decdd6b6a74f9552cddc64304 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Mon, 16 Jan 2017 17:37:35 +0000 Subject: [PATCH 2/6] trie, core/state: Add 'children' parameter to TrieIterator.next() --- core/state/iterator.go | 6 +++--- trie/iterator.go | 16 +++++++++++----- trie/iterator_test.go | 2 +- trie/sync_test.go | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/core/state/iterator.go b/core/state/iterator.go index a58a15ad39ff1..3e82dca099c4a 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -80,7 +80,7 @@ func (it *NodeIterator) step() error { } // If we had data nodes previously, we surely have at least state nodes if it.dataIt != nil { - if cont := it.dataIt.Next(); !cont { + if cont := it.dataIt.Next(true); !cont { if it.dataIt.Error != nil { return it.dataIt.Error } @@ -94,7 +94,7 @@ func (it *NodeIterator) step() error { return nil } // Step to the next state trie node, terminating if we're out of nodes - if cont := it.stateIt.Next(); !cont { + if cont := it.stateIt.Next(true); !cont { if it.stateIt.Error != nil { return it.stateIt.Error } @@ -120,7 +120,7 @@ func (it *NodeIterator) step() error { return err } it.dataIt = trie.NewNodeIterator(dataTrie) - if !it.dataIt.Next() { + if !it.dataIt.Next(true) { it.dataIt = nil } if !bytes.Equal(account.CodeHash, emptyCodeHash) { diff --git a/trie/iterator.go b/trie/iterator.go index 4722b99269e03..75f6fe14e4389 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -40,7 +40,7 @@ func NewIterator(trie *Trie) *Iterator { // Next moves the iterator forward one key-value entry. func (it *Iterator) Next() bool { - for it.nodeIt.Next() { + for it.nodeIt.Next(true) { if it.nodeIt.Leaf { it.Key = it.makeKey() it.Value = it.nodeIt.LeafBlob @@ -104,14 +104,15 @@ func NewNodeIterator(trie *Trie) *NodeIterator { // Next moves the iterator to the next node, returning whether there are any // further nodes. In case of an internal error this method returns false and -// sets the Error field to the encountered failure. -func (it *NodeIterator) Next() bool { +// sets the Error field to the encountered failure. If `children` is false, +// skips iterating over any subnodes of the current node. +func (it *NodeIterator) Next(children bool) bool { // If the iterator failed previously, don't do anything if it.Error != nil { return false } // Otherwise step forward with the iterator and report any errors - if err := it.step(); err != nil { + if err := it.step(children); err != nil { it.Error = err return false } @@ -119,7 +120,7 @@ func (it *NodeIterator) Next() bool { } // step moves the iterator to the next node of the trie. -func (it *NodeIterator) step() error { +func (it *NodeIterator) step(children bool) error { if it.trie == nil { // Abort if we reached the end of the iteration return nil @@ -135,6 +136,11 @@ func (it *NodeIterator) step() error { return nil } + if !children { + // If we're skipping children, pop the current node first + it.stack = it.stack[:len(it.stack)-1] + } + // Continue iteration to the next child for { if len(it.stack) == 0 { diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 41672d8d4fefd..0582e8d7528b5 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -101,7 +101,7 @@ func TestNodeIteratorCoverage(t *testing.T) { // Gather all the node hashes found by the iterator hashes := make(map[common.Hash]struct{}) - for it := NewNodeIterator(trie); it.Next(); { + for it := NewNodeIterator(trie); it.Next(true); { if it.Hash != (common.Hash{}) { hashes[it.Hash] = struct{}{} } diff --git a/trie/sync_test.go b/trie/sync_test.go index 4168c4d658014..547a6938f1f49 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -81,7 +81,7 @@ func checkTrieConsistency(db Database, root common.Hash) error { return nil // // Consider a non existent state consistent } it := NewNodeIterator(trie) - for it.Next() { + for it.Next(true) { } return it.Error } From 2b78c04da0ab6ec0893a0a5f48d2009ec60edfc5 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Tue, 17 Jan 2017 12:05:22 +0000 Subject: [PATCH 3/6] Move key generation to NodeIterator --- trie/iterator.go | 69 +++++++++++++++++++------------------------ trie/iterator_test.go | 2 -- 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/trie/iterator.go b/trie/iterator.go index 75f6fe14e4389..6610ff368f2c6 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -22,7 +22,6 @@ import "github.com/ethereum/go-ethereum/common" type Iterator struct { trie *Trie nodeIt *NodeIterator - keyBuf []byte Key []byte // Current data key on which the iterator is positioned on Value []byte // Current data value on which the iterator is positioned on @@ -33,7 +32,6 @@ func NewIterator(trie *Trie) *Iterator { return &Iterator{ trie: trie, nodeIt: NewNodeIterator(trie), - keyBuf: make([]byte, 0, 64), Key: nil, } } @@ -42,7 +40,7 @@ func NewIterator(trie *Trie) *Iterator { func (it *Iterator) Next() bool { for it.nodeIt.Next(true) { if it.nodeIt.Leaf { - it.Key = it.makeKey() + it.Key = decodeCompact(it.nodeIt.path) it.Value = it.nodeIt.LeafBlob return true } @@ -52,32 +50,14 @@ func (it *Iterator) Next() bool { return false } -func (it *Iterator) makeKey() []byte { - key := it.keyBuf[:0] - for _, se := range it.nodeIt.stack { - switch node := se.node.(type) { - case *fullNode: - if se.child <= 16 { - key = append(key, byte(se.child)) - } - case *shortNode: - if hasTerm(node.Key) { - key = append(key, node.Key[:len(node.Key)-1]...) - } else { - key = append(key, node.Key...) - } - } - } - return decodeCompact(key) -} - // nodeIteratorState represents the iteration state at one particular node of the // trie, which can be resumed at a later invocation. type nodeIteratorState struct { - hash common.Hash // Hash of the node being iterated (nil if not standalone) - node node // Trie node being iterated - parent common.Hash // Hash of the first full ancestor node (nil if current is the root) - child int // Child to be processed next + hash common.Hash // Hash of the node being iterated (nil if not standalone) + node node // Trie node being iterated + parent common.Hash // Hash of the first full ancestor node (nil if current is the root) + child int // Child to be processed next + pathlen int // Length of the path to this node } // NodeIterator is an iterator to traverse the trie post-order. @@ -90,6 +70,7 @@ type NodeIterator struct { Parent common.Hash // Hash of the first full ancestor node (nil if current is the root) Leaf bool // Flag whether the current node is a value (data) node LeafBlob []byte // Data blob contained within a leaf (otherwise nil) + path []byte // Path to the current node Error error // Failure set in case of an internal error in the iterator } @@ -138,6 +119,7 @@ func (it *NodeIterator) step(children bool) error { if !children { // If we're skipping children, pop the current node first + it.path = it.path[:it.stack[len(it.stack)-1].pathlen] it.stack = it.stack[:len(it.stack)-1] } @@ -157,11 +139,13 @@ func (it *NodeIterator) step(children bool) error { parent.child++ if parent.child < len(node.Children) { it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), - node: node.Children[parent.child], - parent: ancestor, - child: -1, + hash: common.BytesToHash(node.flags.hash), + node: node.Children[parent.child], + parent: ancestor, + child: -1, + pathlen: len(it.path), }) + it.path = append(it.path, byte(parent.child)) break } } else if node, ok := parent.node.(*shortNode); ok { @@ -169,31 +153,38 @@ func (it *NodeIterator) step(children bool) error { if parent.child < 0 { parent.child++ it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), - node: node.Val, - parent: ancestor, - child: -1, + hash: common.BytesToHash(node.flags.hash), + node: node.Val, + parent: ancestor, + child: -1, + pathlen: len(it.path), }) + if hasTerm(node.Key) { + it.path = append(it.path, node.Key[:len(node.Key)-1]...) + } else { + it.path = append(it.path, node.Key...) + } break } } else if hash, ok := parent.node.(hashNode); ok { // Hash node, resolve the hash child from the database if parent.child < 0 { parent.child++ - node, err := it.trie.resolveHash(hash, nil, nil) if err != nil { return err } it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(hash), - node: node, - parent: ancestor, - child: -1, + hash: common.BytesToHash(hash), + node: node, + parent: ancestor, + child: -1, + pathlen: len(it.path), }) break } } + it.path = it.path[:parent.pathlen] it.stack = it.stack[:len(it.stack)-1] } return nil diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 0582e8d7528b5..89aa8cc15d8be 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -17,7 +17,6 @@ package trie import ( - "fmt" "testing" "github.com/ethereum/go-ethereum/common" @@ -45,7 +44,6 @@ func TestIterator(t *testing.T) { found := make(map[string]string) it := NewIterator(trie) for it.Next() { - fmt.Printf("%s=%s\n", it.Key, it.Value) found[string(it.Key)] = string(it.Value) } From f105ac06449b8a405f8e6dc8c46229d8342c2e49 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Tue, 17 Jan 2017 14:47:35 +0000 Subject: [PATCH 4/6] core/state, trie: Rewrite NodeIterator as an interface --- core/state/iterator.go | 27 +++++----- trie/iterator.go | 120 ++++++++++++++++++++++++++--------------- trie/iterator_test.go | 4 +- trie/secure_trie.go | 2 +- trie/sync_test.go | 2 +- 5 files changed, 93 insertions(+), 62 deletions(-) diff --git a/core/state/iterator.go b/core/state/iterator.go index 3e82dca099c4a..170aec9833541 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -31,15 +31,14 @@ import ( type NodeIterator struct { state *StateDB // State being iterated - stateIt *trie.NodeIterator // Primary iterator for the global state trie - dataIt *trie.NodeIterator // Secondary iterator for the data trie of a contract + stateIt trie.NodeIterator // Primary iterator for the global state trie + dataIt trie.NodeIterator // Secondary iterator for the data trie of a contract accountHash common.Hash // Hash of the node containing the account codeHash common.Hash // Hash of the contract source code code []byte // Source code associated with a contract Hash common.Hash // Hash of the current entry being iterated (nil if not standalone) - Entry interface{} // Current state entry being iterated (internal representation) Parent common.Hash // Hash of the first full ancestor node (nil if current is the root) Error error // Failure set in case of an internal error in the iterator @@ -81,8 +80,8 @@ func (it *NodeIterator) step() error { // If we had data nodes previously, we surely have at least state nodes if it.dataIt != nil { if cont := it.dataIt.Next(true); !cont { - if it.dataIt.Error != nil { - return it.dataIt.Error + if it.dataIt.Error() != nil { + return it.dataIt.Error() } it.dataIt = nil } @@ -95,14 +94,14 @@ func (it *NodeIterator) step() error { } // Step to the next state trie node, terminating if we're out of nodes if cont := it.stateIt.Next(true); !cont { - if it.stateIt.Error != nil { - return it.stateIt.Error + if it.stateIt.Error() != nil { + return it.stateIt.Error() } it.state, it.stateIt = nil, nil return nil } // If the state trie node is an internal entry, leave as is - if !it.stateIt.Leaf { + if !it.stateIt.Leaf() { return nil } // Otherwise we've reached an account node, initiate data iteration @@ -112,7 +111,7 @@ func (it *NodeIterator) step() error { Root common.Hash CodeHash []byte } - if err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob), &account); err != nil { + if err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob()), &account); err != nil { return err } dataTrie, err := trie.New(account.Root, it.state.db) @@ -130,7 +129,7 @@ func (it *NodeIterator) step() error { return fmt.Errorf("code %x: %v", account.CodeHash, err) } } - it.accountHash = it.stateIt.Parent + it.accountHash = it.stateIt.Parent() return nil } @@ -138,7 +137,7 @@ func (it *NodeIterator) step() error { // The method returns whether there are any more data left for inspection. func (it *NodeIterator) retrieve() bool { // Clear out any previously set values - it.Hash, it.Entry = common.Hash{}, nil + it.Hash = common.Hash{} // If the iteration's done, return no available data if it.state == nil { @@ -147,14 +146,14 @@ func (it *NodeIterator) retrieve() bool { // Otherwise retrieve the current entry switch { case it.dataIt != nil: - it.Hash, it.Entry, it.Parent = it.dataIt.Hash, it.dataIt.Node, it.dataIt.Parent + it.Hash, it.Parent = it.dataIt.Hash(), it.dataIt.Parent() if it.Parent == (common.Hash{}) { it.Parent = it.accountHash } case it.code != nil: - it.Hash, it.Entry, it.Parent = it.codeHash, it.code, it.accountHash + it.Hash, it.Parent = it.codeHash, it.accountHash case it.stateIt != nil: - it.Hash, it.Entry, it.Parent = it.stateIt.Hash, it.stateIt.Node, it.stateIt.Parent + it.Hash, it.Parent = it.stateIt.Hash(), it.stateIt.Parent() } return true } diff --git a/trie/iterator.go b/trie/iterator.go index 6610ff368f2c6..cbfab3dd41314 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -21,7 +21,7 @@ import "github.com/ethereum/go-ethereum/common" // Iterator is a key-value trie iterator that traverses a Trie. type Iterator struct { trie *Trie - nodeIt *NodeIterator + nodeIt NodeIterator Key []byte // Current data key on which the iterator is positioned on Value []byte // Current data value on which the iterator is positioned on @@ -39,9 +39,9 @@ func NewIterator(trie *Trie) *Iterator { // Next moves the iterator forward one key-value entry. func (it *Iterator) Next() bool { for it.nodeIt.Next(true) { - if it.nodeIt.Leaf { - it.Key = decodeCompact(it.nodeIt.path) - it.Value = it.nodeIt.LeafBlob + if it.nodeIt.Leaf() { + it.Key = decodeCompact(it.nodeIt.Path()) + it.Value = it.nodeIt.LeafBlob() return true } } @@ -50,6 +50,17 @@ func (it *Iterator) Next() bool { return false } +// NodeIterator is an iterator to traverse the trie pre-order. +type NodeIterator interface { + Hash() common.Hash + Parent() common.Hash + Leaf() bool + LeafBlob() []byte + Path() []byte + Next(bool) bool + Error() error +} + // nodeIteratorState represents the iteration state at one particular node of the // trie, which can be resumed at a later invocation. type nodeIteratorState struct { @@ -60,48 +71,92 @@ type nodeIteratorState struct { pathlen int // Length of the path to this node } -// NodeIterator is an iterator to traverse the trie post-order. -type NodeIterator struct { +type nodeIterator struct { trie *Trie // Trie being iterated stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state - Hash common.Hash // Hash of the current node being iterated (nil if not standalone) - Node node // Current node being iterated (internal representation) - Parent common.Hash // Hash of the first full ancestor node (nil if current is the root) - Leaf bool // Flag whether the current node is a value (data) node - LeafBlob []byte // Data blob contained within a leaf (otherwise nil) - path []byte // Path to the current node + err error // Failure set in case of an internal error in the iterator - Error error // Failure set in case of an internal error in the iterator + path []byte // Path to the current node } // NewNodeIterator creates an post-order trie iterator. -func NewNodeIterator(trie *Trie) *NodeIterator { +func NewNodeIterator(trie *Trie) NodeIterator { if trie.Hash() == emptyState { - return new(NodeIterator) + return new(nodeIterator) + } + return &nodeIterator{trie: trie} +} + +// Hash returns the hash of the current node +func (it *nodeIterator) Hash() common.Hash { + if it.trie == nil { + return common.Hash{} + } + + return it.stack[len(it.stack)-1].hash +} + +// Parent returns the hash of the parent node +func (it *nodeIterator) Parent() common.Hash { + if it.trie == nil { + return common.Hash{} + } + + return it.stack[len(it.stack)-1].parent +} + +// Leaf returns true if the current node is a leaf +func (it *nodeIterator) Leaf() bool { + if it.trie == nil { + return false } - return &NodeIterator{trie: trie} + + _, ok := it.stack[len(it.stack)-1].node.(valueNode) + return ok +} + +// LeafBlob returns the data for the current node, if it is a leaf +func (it *nodeIterator) LeafBlob() []byte { + if it.trie == nil { + return nil + } + + if node, ok := it.stack[len(it.stack)-1].node.(valueNode); ok { + return []byte(node) + } + return nil +} + +// Path returns the hex-encoded path to the current node +func (it *nodeIterator) Path() []byte { + return it.path +} + +// Error returns the error set in case of an internal error in the iterator +func (it *nodeIterator) Error() error { + return it.err } // Next moves the iterator to the next node, returning whether there are any // further nodes. In case of an internal error this method returns false and // sets the Error field to the encountered failure. If `children` is false, // skips iterating over any subnodes of the current node. -func (it *NodeIterator) Next(children bool) bool { +func (it *nodeIterator) Next(children bool) bool { // If the iterator failed previously, don't do anything - if it.Error != nil { + if it.err != nil { return false } // Otherwise step forward with the iterator and report any errors if err := it.step(children); err != nil { - it.Error = err + it.err = err return false } - return it.retrieve() + return it.trie != nil } // step moves the iterator to the next node of the trie. -func (it *NodeIterator) step(children bool) error { +func (it *nodeIterator) step(children bool) error { if it.trie == nil { // Abort if we reached the end of the iteration return nil @@ -189,26 +244,3 @@ func (it *NodeIterator) step(children bool) error { } return nil } - -// retrieve pulls and caches the current trie node the iterator is traversing. -// In case of a value node, the additional leaf blob is also populated with the -// data contents for external interpretation. -// -// The method returns whether there are any more data left for inspection. -func (it *NodeIterator) retrieve() bool { - // Clear out any previously set values - it.Hash, it.Node, it.Parent, it.Leaf, it.LeafBlob = common.Hash{}, nil, common.Hash{}, false, nil - - // If the iteration's done, return no available data - if it.trie == nil { - return false - } - // Otherwise retrieve the current node and resolve leaf accessors - state := it.stack[len(it.stack)-1] - - it.Hash, it.Node, it.Parent = state.hash, state.node, state.parent - if value, ok := it.Node.(valueNode); ok { - it.Leaf, it.LeafBlob = true, []byte(value) - } - return true -} diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 89aa8cc15d8be..adfc30d460887 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -100,8 +100,8 @@ func TestNodeIteratorCoverage(t *testing.T) { // Gather all the node hashes found by the iterator hashes := make(map[common.Hash]struct{}) for it := NewNodeIterator(trie); it.Next(true); { - if it.Hash != (common.Hash{}) { - hashes[it.Hash] = struct{}{} + if it.Hash() != (common.Hash{}) { + hashes[it.Hash()] = struct{}{} } } // Cross check the hashes and the database itself diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 4d9ebe4d37af4..8b90da02fe007 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -159,7 +159,7 @@ func (t *SecureTrie) Iterator() *Iterator { return t.trie.Iterator() } -func (t *SecureTrie) NodeIterator() *NodeIterator { +func (t *SecureTrie) NodeIterator() NodeIterator { return NewNodeIterator(&t.trie) } diff --git a/trie/sync_test.go b/trie/sync_test.go index 547a6938f1f49..acae039cd5a2d 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -83,7 +83,7 @@ func checkTrieConsistency(db Database, root common.Hash) error { it := NewNodeIterator(trie) for it.Next(true) { } - return it.Error + return it.Error() } // Tests that an empty trie is not scheduled for syncing. From d21b527e54caec85f134dd166a926fd57f970116 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Thu, 2 Feb 2017 18:17:37 +0000 Subject: [PATCH 5/6] trie: Implement differenceIterator --- trie/iterator.go | 140 +++++++++++++++++++++++++++++++++++++----- trie/iterator_test.go | 57 +++++++++++++++++ 2 files changed, 182 insertions(+), 15 deletions(-) diff --git a/trie/iterator.go b/trie/iterator.go index cbfab3dd41314..1ff13226e0608 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -16,11 +16,13 @@ package trie -import "github.com/ethereum/go-ethereum/common" +import ( + "bytes" + "github.com/ethereum/go-ethereum/common" +) // Iterator is a key-value trie iterator that traverses a Trie. type Iterator struct { - trie *Trie nodeIt NodeIterator Key []byte // Current data key on which the iterator is positioned on @@ -30,12 +32,19 @@ type Iterator struct { // NewIterator creates a new key-value iterator. func NewIterator(trie *Trie) *Iterator { return &Iterator{ - trie: trie, nodeIt: NewNodeIterator(trie), Key: nil, } } +// FromNodeIterator creates a new key-value iterator from a node iterator +func FromNodeIterator(it NodeIterator) *Iterator { + return &Iterator{ + nodeIt: it, + Key: nil, + } +} + // Next moves the iterator forward one key-value entry. func (it *Iterator) Next() bool { for it.nodeIt.Next(true) { @@ -179,6 +188,7 @@ func (it *nodeIterator) step(children bool) error { } // Continue iteration to the next child +outer: for { if len(it.stack) == 0 { it.trie = nil @@ -191,24 +201,28 @@ func (it *nodeIterator) step(children bool) error { } if node, ok := parent.node.(*fullNode); ok { // Full node, iterate over children - parent.child++ - if parent.child < len(node.Children) { - it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), - node: node.Children[parent.child], - parent: ancestor, - child: -1, - pathlen: len(it.path), - }) - it.path = append(it.path, byte(parent.child)) - break + for parent.child++; parent.child < len(node.Children); parent.child++ { + child := node.Children[parent.child] + if child != nil { + hash, _ := child.cache() + it.stack = append(it.stack, &nodeIteratorState{ + hash: common.BytesToHash(hash), + node: child, + parent: ancestor, + child: -1, + pathlen: len(it.path), + }) + it.path = append(it.path, byte(parent.child)) + break outer + } } } else if node, ok := parent.node.(*shortNode); ok { // Short node, return the pointer singleton child if parent.child < 0 { parent.child++ + hash, _ := node.Val.cache() it.stack = append(it.stack, &nodeIteratorState{ - hash: common.BytesToHash(node.flags.hash), + hash: common.BytesToHash(hash), node: node.Val, parent: ancestor, child: -1, @@ -244,3 +258,99 @@ func (it *nodeIterator) step(children bool) error { } return nil } + +type differenceIterator struct { + a, b NodeIterator + eof bool + count int // Number of nodes scanned on either trie +} + +// NewDifferenceIterator constructs a NodeIterator that iterates over elements in b that +// are not in a. +func NewDifferenceIterator(a, b NodeIterator) (NodeIterator, *int) { + a.Next(true) + it := &differenceIterator{ + a: a, + b: b, + eof: false, + count: 0, + } + return it, &it.count +} + +func (it *differenceIterator) Hash() common.Hash { + return it.b.Hash() +} + +func (it *differenceIterator) Parent() common.Hash { + return it.b.Parent() +} + +func (it *differenceIterator) Leaf() bool { + return it.b.Leaf() +} + +func (it *differenceIterator) LeafBlob() []byte { + return it.b.LeafBlob() +} + +func (it *differenceIterator) Path() []byte { + return it.b.Path() +} + +func (it *differenceIterator) Next(bool) bool { + // Invariants: + // - We always advance at least one element in b. + // - At the start of this function, a's path is lexically greater than b's. + if !it.b.Next(true) { + return false + } + it.count += 1 + + if it.eof { + // a has reached eof, so we just return all elements from b + return true + } + + for { + apath, bpath := it.a.Path(), it.b.Path() + cmp := bytes.Compare(apath, bpath) + if cmp < 0 { + // b jumped past a; advance a + if !it.a.Next(true) { + it.eof = true + return true + } + it.count += 1 + } else if cmp > 0 { + // b is before a + return true + } else { + if it.a.Hash() != it.b.Hash() || it.a.Leaf() != it.b.Leaf() { + // Keys are identical, but hashes or leaf status differs + return true + } + if it.a.Leaf() && it.b.Leaf() && !bytes.Equal(it.a.LeafBlob(), it.b.LeafBlob()) { + // Both are leaf nodes, but with different values + return true + } + + // a and b are identical; skip this whole subtree if the nodes have hashes + if !it.b.Next(it.a.Hash() == common.Hash{}) { + return false + } + if !it.a.Next(it.a.Hash() == common.Hash{}) { + it.eof = true + return true + } + it.count += 2 + } + } +} + +func (it *differenceIterator) Error() error { + if err := it.a.Error(); err != nil { + return err + } + return it.b.Error() +} diff --git a/trie/iterator_test.go b/trie/iterator_test.go index adfc30d460887..51ad065cb71e1 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -116,3 +116,60 @@ func TestNodeIteratorCoverage(t *testing.T) { } } } + +func TestDifferenceIterator(t *testing.T) { + triea := newEmpty() + valsa := []struct{ k, v string }{ + {"bar", "b"}, + {"barb", "ba"}, + {"bars", "bb"}, + {"bard", "bc"}, + {"fab", "z"}, + {"foo", "a"}, + {"food", "ab"}, + {"foos", "aa"}, + } + for _, val := range valsa { + triea.Update([]byte(val.k), []byte(val.v)) + } + triea.Commit() + + trieb := newEmpty() + valsb := []struct{ k, v string }{ + {"aardvark", "c"}, + {"bar", "b"}, + {"barb", "bd"}, + {"bars", "be"}, + {"fab", "z"}, + {"foo", "a"}, + {"foos", "aa"}, + {"food", "ab"}, + {"jars", "d"}, + } + for _, val := range valsb { + trieb.Update([]byte(val.k), []byte(val.v)) + } + trieb.Commit() + + found := make(map[string]string) + di, _ := NewDifferenceIterator(NewNodeIterator(triea), NewNodeIterator(trieb)) + it := FromNodeIterator(di) + for it.Next() { + found[string(it.Key)] = string(it.Value) + } + + all := []struct{ k, v string }{ + {"aardvark", "c"}, + {"barb", "bd"}, + {"bars", "be"}, + {"jars", "d"}, + } + for _, item := range all { + if found[item.k] != item.v { + t.Errorf("iterator value mismatch for %s: got %q want %q", item.k, found[item.k], item.v) + } + } + if len(found) != len(all) { + t.Errorf("iterator count mismatch: got %d values, want %d", len(found), len(all)) + } +} From 3015bfd176d098937a640cee0cd4d575a6dd3ff6 Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Tue, 21 Feb 2017 11:08:14 +0000 Subject: [PATCH 6/6] trie: Fixes in response to review --- trie/iterator.go | 63 ++++++++++++++++++++++++------------------- trie/iterator_test.go | 2 +- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/trie/iterator.go b/trie/iterator.go index 1ff13226e0608..234c49ecc8351 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -33,15 +33,13 @@ type Iterator struct { func NewIterator(trie *Trie) *Iterator { return &Iterator{ nodeIt: NewNodeIterator(trie), - Key: nil, } } // FromNodeIterator creates a new key-value iterator from a node iterator -func FromNodeIterator(it NodeIterator) *Iterator { +func NewIteratorFromNodeIterator(it NodeIterator) *Iterator { return &Iterator{ nodeIt: it, - Key: nil, } } @@ -61,12 +59,22 @@ func (it *Iterator) Next() bool { // NodeIterator is an iterator to traverse the trie pre-order. type NodeIterator interface { + // Hash returns the hash of the current node Hash() common.Hash + // Parent returns the hash of the parent of the current node Parent() common.Hash + // Leaf returns true iff the current node is a leaf node. Leaf() bool + // LeafBlob returns the contents of the node, if it is a leaf. + // Callers must not retain references to the return value after calling Next() LeafBlob() []byte + // Path returns the hex-encoded path to the current node. + // Callers must not retain references to the return value after calling Next() Path() []byte + // Next moves the iterator to the next node. If the parameter is false, any child + // nodes will be skipped. Next(bool) bool + // Error returns the error status of the iterator. Error() error } @@ -99,7 +107,7 @@ func NewNodeIterator(trie *Trie) NodeIterator { // Hash returns the hash of the current node func (it *nodeIterator) Hash() common.Hash { - if it.trie == nil { + if len(it.stack) == 0 { return common.Hash{} } @@ -108,7 +116,7 @@ func (it *nodeIterator) Hash() common.Hash { // Parent returns the hash of the parent node func (it *nodeIterator) Parent() common.Hash { - if it.trie == nil { + if len(it.stack) == 0 { return common.Hash{} } @@ -117,7 +125,7 @@ func (it *nodeIterator) Parent() common.Hash { // Leaf returns true if the current node is a leaf func (it *nodeIterator) Leaf() bool { - if it.trie == nil { + if len(it.stack) == 0 { return false } @@ -127,7 +135,7 @@ func (it *nodeIterator) Leaf() bool { // LeafBlob returns the data for the current node, if it is a leaf func (it *nodeIterator) LeafBlob() []byte { - if it.trie == nil { + if len(it.stack) == 0 { return nil } @@ -149,15 +157,15 @@ func (it *nodeIterator) Error() error { // Next moves the iterator to the next node, returning whether there are any // further nodes. In case of an internal error this method returns false and -// sets the Error field to the encountered failure. If `children` is false, +// sets the Error field to the encountered failure. If `descend` is false, // skips iterating over any subnodes of the current node. -func (it *nodeIterator) Next(children bool) bool { +func (it *nodeIterator) Next(descend bool) bool { // If the iterator failed previously, don't do anything if it.err != nil { return false } // Otherwise step forward with the iterator and report any errors - if err := it.step(children); err != nil { + if err := it.step(descend); err != nil { it.err = err return false } @@ -165,7 +173,7 @@ func (it *nodeIterator) Next(children bool) bool { } // step moves the iterator to the next node of the trie. -func (it *nodeIterator) step(children bool) error { +func (it *nodeIterator) step(descend bool) error { if it.trie == nil { // Abort if we reached the end of the iteration return nil @@ -181,7 +189,7 @@ func (it *nodeIterator) step(children bool) error { return nil } - if !children { + if !descend { // If we're skipping children, pop the current node first it.path = it.path[:it.stack[len(it.stack)-1].pathlen] it.stack = it.stack[:len(it.stack)-1] @@ -260,20 +268,19 @@ outer: } type differenceIterator struct { - a, b NodeIterator - eof bool - count int // Number of nodes scanned on either trie + a, b NodeIterator // Nodes returned are those in b - a. + eof bool // Indicates a has run out of elements + count int // Number of nodes scanned on either trie } // NewDifferenceIterator constructs a NodeIterator that iterates over elements in b that -// are not in a. +// are not in a. Returns the iterator, and a pointer to an integer recording the number +// of nodes seen. func NewDifferenceIterator(a, b NodeIterator) (NodeIterator, *int) { a.Next(true) it := &differenceIterator{ - a: a, - b: b, - eof: false, - count: 0, + a: a, + b: b, } return it, &it.count } @@ -314,18 +321,18 @@ func (it *differenceIterator) Next(bool) bool { for { apath, bpath := it.a.Path(), it.b.Path() - cmp := bytes.Compare(apath, bpath) - if cmp < 0 { + switch bytes.Compare(apath, bpath) { + case -1: // b jumped past a; advance a if !it.a.Next(true) { it.eof = true return true } it.count += 1 - } else if cmp > 0 { + case 1: // b is before a return true - } else { + case 0: if it.a.Hash() != it.b.Hash() || it.a.Leaf() != it.b.Leaf() { // Keys are identical, but hashes or leaf status differs return true @@ -336,14 +343,16 @@ func (it *differenceIterator) Next(bool) bool { } // a and b are identical; skip this whole subtree if the nodes have hashes - if !it.b.Next(it.a.Hash() == common.Hash{}) { + hasHash := it.a.Hash() == common.Hash{} + if !it.b.Next(hasHash) { return false } - if !it.a.Next(it.a.Hash() == common.Hash{}) { + it.count += 1 + if !it.a.Next(hasHash) { it.eof = true return true } - it.count += 2 + it.count += 1 } } } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 51ad065cb71e1..0ad9711eddb3c 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -153,7 +153,7 @@ func TestDifferenceIterator(t *testing.T) { found := make(map[string]string) di, _ := NewDifferenceIterator(NewNodeIterator(triea), NewNodeIterator(trieb)) - it := FromNodeIterator(di) + it := NewIteratorFromNodeIterator(di) for it.Next() { found[string(it.Key)] = string(it.Value) }