Skip to content

Commit

Permalink
MerkleDB Remove ID from Node to reduce size and removal channel creat…
Browse files Browse the repository at this point in the history
…ion. (#2324)

Co-authored-by: Dan Laine <daniel.laine@avalabs.org>
  • Loading branch information
dboehm-avalabs and danlaine committed Nov 29, 2023
1 parent 56c2ad9 commit c5169a3
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 122 deletions.
4 changes: 2 additions & 2 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
return io.ErrUnexpectedEOF
}

n.children = make(map[byte]child, numChildren)
n.children = make(map[byte]*child, numChildren)
var previousChild uint64
for i := uint64(0); i < numChildren; i++ {
index, err := c.decodeUint(src)
Expand All @@ -184,7 +184,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
if err != nil {
return err
}
n.children[byte(index)] = child{
n.children[byte(index)] = &child{
compressedKey: compressedKey,
id: childID,
hasValue: hasValue,
Expand Down
8 changes: 4 additions & 4 deletions x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) {

numChildren := r.Intn(int(bf)) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
for i := 0; i < numChildren; i++ {
var childID ids.ID
_, _ = r.Read(childID[:]) // #nosec G404

childKeyBytes := make([]byte, r.Intn(32)) // #nosec G404
_, _ = r.Read(childKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(childKeyBytes),
id: childID,
}
Expand Down Expand Up @@ -202,14 +202,14 @@ func FuzzEncodeHashValues(f *testing.F) {
for _, bf := range validBranchFactors { // Create a random node
r := rand.New(rand.NewSource(int64(randSeed))) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
numChildren := r.Intn(int(bf)) // #nosec G404
for i := 0; i < numChildren; i++ {
compressedKeyLen := r.Intn(32) // #nosec G404
compressedKeyBytes := make([]byte, compressedKeyLen)
_, _ = r.Read(compressedKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(compressedKeyBytes),
id: ids.GenerateTestID(),
hasValue: r.Intn(2) == 1, // #nosec G404
Expand Down
59 changes: 29 additions & 30 deletions x/merkledb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ type merkleDB struct {
// It is the node with a nil key and is the ancestor of all nodes in the trie.
// If it has a value or has multiple children, it is also the root of the trie.
sentinelNode *node
rootID ids.ID

// Valid children of this trie.
childViews []*trieView
Expand Down Expand Up @@ -260,14 +261,13 @@ func newDatabase(
tokenSize: BranchFactorToTokenSize[config.BranchFactor],
}

root, err := trieDB.initializeRootIfNeeded()
if err != nil {
if err := trieDB.initializeRoot(); err != nil {
return nil, err
}

// add current root to history (has no changes)
trieDB.history.record(&changeSummary{
rootID: root,
rootID: trieDB.rootID,
values: map[Key]*change[maybe.Maybe[[]byte]]{},
nodes: map[Key]*change[*node]{},
})
Expand Down Expand Up @@ -578,13 +578,7 @@ func (db *merkleDB) GetMerkleRoot(ctx context.Context) (ids.ID, error) {

// Assumes [db.lock] is read locked.
func (db *merkleDB) getMerkleRoot() ids.ID {
if !isSentinelNodeTheRoot(db.sentinelNode) {
// if the sentinel node should be skipped, the trie's root is the nil key node's only child
for _, childEntry := range db.sentinelNode.children {
return childEntry.id
}
}
return db.sentinelNode.id
return db.rootID
}

// isSentinelNodeTheRoot returns true if the passed in sentinel node has a value and or multiple child nodes
Expand Down Expand Up @@ -982,6 +976,7 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
// Only modify in-memory state after the commit succeeds
// so that we don't need to clean up on error.
db.sentinelNode = sentinelChange.after
db.rootID = changes.rootID
db.history.record(changes)
return nil
}
Expand Down Expand Up @@ -1161,34 +1156,38 @@ func (db *merkleDB) invalidateChildrenExcept(exception *trieView) {
}
}

func (db *merkleDB) initializeRootIfNeeded() (ids.ID, error) {
// not sure if the sentinel node exists or if it had a value
// check under both prefixes
func (db *merkleDB) initializeRoot() error {
// Not sure if the sentinel node exists or if it had a value,
// so check under both prefixes
var err error
db.sentinelNode, err = db.intermediateNodeDB.Get(Key{})

if errors.Is(err, database.ErrNotFound) {
// Didn't find the sentinel in the intermediateNodeDB, check the valueNodeDB
db.sentinelNode, err = db.valueNodeDB.Get(Key{})
}
if err == nil {
// sentinel node already exists, so calculate the root ID of the trie
db.sentinelNode.calculateID(db.metrics)
return db.getMerkleRoot(), nil
}
if !errors.Is(err, database.ErrNotFound) {
return ids.Empty, err
}

// sentinel node doesn't exist; make a new one.
db.sentinelNode = newNode(Key{})

// update its ID
db.sentinelNode.calculateID(db.metrics)
if err != nil {
if !errors.Is(err, database.ErrNotFound) {
return err
}

if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return ids.Empty, err
// Sentinel node doesn't exist in either database prefix.
// Make a new one and store it in the intermediateNodeDB
db.sentinelNode = newNode(Key{})
if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return err
}
}

return db.sentinelNode.id, nil
db.rootID = db.sentinelNode.calculateID(db.metrics)
if !isSentinelNodeTheRoot(db.sentinelNode) {
// If the sentinel node is not the root, the trie's root is the sentinel node's only child
for _, childEntry := range db.sentinelNode.children {
db.rootID = childEntry.id
}
}
return nil
}

// Returns a view of the trie as it was when it had root [rootID] for keys within range [start, end].
Expand Down Expand Up @@ -1289,7 +1288,7 @@ func (db *merkleDB) Clear() error {

// Clear root
db.sentinelNode = newNode(Key{})
db.sentinelNode.calculateID(db.metrics)
db.rootID = db.sentinelNode.calculateID(db.metrics)

// Clear history
db.history = newTrieHistory(db.history.maxHistoryLen)
Expand Down
4 changes: 2 additions & 2 deletions x/merkledb/history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,8 +660,8 @@ func TestHistoryGetChangesToRoot(t *testing.T) {
rootID: ids.GenerateTestID(),
nodes: map[Key]*change[*node]{
ToKey([]byte{byte(i)}): {
before: &node{id: ids.GenerateTestID()},
after: &node{id: ids.GenerateTestID()},
before: &node{},
after: &node{},
},
},
values: map[Key]*change[maybe.Maybe[[]byte]]{
Expand Down
33 changes: 16 additions & 17 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package merkledb

import (
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"

"github.com/ava-labs/avalanchego/ids"
Expand All @@ -17,7 +16,7 @@ const HashLength = 32
// Representation of a node stored in the database.
type dbNode struct {
value maybe.Maybe[[]byte]
children map[byte]child
children map[byte]*child
}

type child struct {
Expand All @@ -29,7 +28,6 @@ type child struct {
// node holds additional information on top of the dbNode that makes calculations easier to do
type node struct {
dbNode
id ids.ID
key Key
nodeBytes []byte
valueDigest maybe.Maybe[[]byte]
Expand All @@ -39,7 +37,7 @@ type node struct {
func newNode(key Key) *node {
return &node{
dbNode: dbNode{
children: make(map[byte]child, 2),
children: make(map[byte]*child, 2),
},
key: key,
}
Expand Down Expand Up @@ -78,19 +76,14 @@ func (n *node) bytes() []byte {
// clear the cached values that will need to be recalculated whenever the node changes
// for example, node ID and byte representation
func (n *node) onNodeChanged() {
n.id = ids.Empty
n.nodeBytes = nil
}

// Returns and caches the ID of this node.
func (n *node) calculateID(metrics merkleMetrics) {
if n.id != ids.Empty {
return
}

func (n *node) calculateID(metrics merkleMetrics) ids.ID {
metrics.HashCalculated()
bytes := codec.encodeHashValues(n)
n.id = hashing.ComputeHash256Array(bytes)
return hashing.ComputeHash256Array(bytes)
}

// Set [n]'s value to [val].
Expand All @@ -114,16 +107,15 @@ func (n *node) setValueDigest() {
func (n *node) addChild(childNode *node, tokenSize int) {
n.setChildEntry(
childNode.key.Token(n.key.length, tokenSize),
child{
&child{
compressedKey: childNode.key.Skip(n.key.length + tokenSize),
id: childNode.id,
hasValue: childNode.hasValue(),
},
)
}

// Adds a child to [n] without a reference to the child node.
func (n *node) setChildEntry(index byte, childEntry child) {
func (n *node) setChildEntry(index byte, childEntry *child) {
n.onNodeChanged()
n.children[index] = childEntry
}
Expand All @@ -139,16 +131,23 @@ func (n *node) removeChild(child *node, tokenSize int) {
// if this ever changes, value will need to be copied as well
// it is safe to clone all fields because they are only written/read while one or both of the db locks are held
func (n *node) clone() *node {
return &node{
id: n.id,
result := &node{
key: n.key,
dbNode: dbNode{
value: n.value,
children: maps.Clone(n.children),
children: make(map[byte]*child, len(n.children)),
},
valueDigest: n.valueDigest,
nodeBytes: n.nodeBytes,
}
for key, existing := range n.children {
result.children[key] = &child{
compressedKey: existing.compressedKey,
id: existing.id,
hasValue: existing.hasValue,
}
}
return result
}

// Returns the ProofNode representation of this node.
Expand Down
2 changes: 1 addition & 1 deletion x/merkledb/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ func addPathInfo(
// We only need the IDs to be correct so that the calculated hash is correct.
n.setChildEntry(
index,
child{
&child{
id: childID,
compressedKey: compressedKey,
})
Expand Down

0 comments on commit c5169a3

Please sign in to comment.