From 329cf0c471867982b523a754ec8c10e342bc57d6 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Thu, 21 Jun 2018 23:14:20 +0800 Subject: [PATCH 01/11] Add constants EMPTY_LEAF_NODE_HASH and EMPTY_NODE_HASHES --- trie/constants.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/trie/constants.py b/trie/constants.py index 6b9be54a..93f5140e 100644 --- a/trie/constants.py +++ b/trie/constants.py @@ -34,3 +34,10 @@ BYTE_1 = bytes([1]) BYTE_0 = bytes([0]) + +# Constants for Sparse Merkle Tree +from eth_hash.auto import keccak +EMPTY_LEAF_NODE_HASH = BLANK_HASH +EMPTY_NODE_HASHES = [EMPTY_LEAF_NODE_HASH] +for _ in range(159): + EMPTY_NODE_HASHES.insert(0, keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0])) From 27cdb66cbff555815b29789d63525691c4a77da3 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Sat, 23 Jun 2018 12:05:31 +0800 Subject: [PATCH 02/11] Add basic get/set method in Sparse Merkle Tree --- trie/sparse_merkle_tree.py | 88 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 trie/sparse_merkle_tree.py diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py new file mode 100644 index 00000000..06c128a4 --- /dev/null +++ b/trie/sparse_merkle_tree.py @@ -0,0 +1,88 @@ +from eth_hash.auto import ( + keccak, +) + +from trie.constants import ( + EMPTY_LEAF_NODE_HASH, + EMPTY_NODE_HASHES, +) +from trie.validation import ( + validate_is_bytes, + validate_length, +) + + +# sanity check +assert EMPTY_LEAF_NODE_HASH == keccak(b'') + +class SparseMerkleTree: + def __init__(self, db): + self.db = db + # Initialize an empty tree with one branch + self.root_hash = keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0]) + self.db[self.root_hash] = EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0] + for i in range(159): + self.db[EMPTY_NODE_HASHES[i]] = EMPTY_NODE_HASHES[i+1] + EMPTY_NODE_HASHES[i+1] + self.db[EMPTY_LEAF_NODE_HASH] = b'' + + def get(self, key): + validate_is_bytes(key) + validate_length(key, 20) + + target_bit = 1 << 159 + path = int.from_bytes(key, byteorder='big') + node_hash = self.root_hash + for i in range(160): + if path & target_bit: + node_hash = self.db[node_hash][32:] + else: + node_hash = self.db[node_hash][:32] + target_bit >>= 1 + + if self.db[node_hash] is b'': + return None + else: + return self.db[node_hash] + + def set(self, key, value): + validate_is_bytes(key) + validate_length(key, 20) + validate_is_bytes(value) + + first_target_bit = 1 << 159 + path = int.from_bytes(key, byteorder='big') + node_hash = self.root_hash + sibling_node_hashes = [] + # Record the sibling nodes along the way + for i in range(160): + if path & first_target_bit: + sibling_node_hashes.append(self.db[node_hash][:32]) + node_hash = self.db[node_hash][32:] + else: + sibling_node_hashes.append(self.db[node_hash][32:]) + node_hash = self.db[node_hash][:32] + first_target_bit >>= 1 + + second_target_bit = 1 + node_hash = self._hash_and_save(value) + for i in range(160): + sibling_node_hash = sibling_node_hashes.pop() + if (path & second_target_bit): + parent_node_hash = self._hash_and_save(sibling_node_hash + node_hash) + else: + parent_node_hash = self._hash_and_save(node_hash + sibling_node_hash) + second_target_bit <<= 1 + node_hash = parent_node_hash + self.root_hash = node_hash + + # + # Utils + # + def _hash_and_save(self, node): + """ + Saves a node into the database and returns its hash + """ + + node_hash = keccak(node) + self.db[node_hash] = node + return node_hash From 4d56dc6e9fb7415e7e4060829ae344621ab46425 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Sat, 23 Jun 2018 12:05:43 +0800 Subject: [PATCH 03/11] Add exist/delete method in Sparse Merkle Tree --- trie/sparse_merkle_tree.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index 06c128a4..0f8484ca 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -75,6 +75,21 @@ def set(self, key, value): node_hash = parent_node_hash self.root_hash = node_hash + def exists(self, key): + validate_is_bytes(key) + validate_length(key, 20) + + return self.get(key) is not None + + def delete(self, key): + """ + Equals to setting the value to None + """ + validate_is_bytes(key) + validate_length(key, 20) + + self.set(key, b'') + # # Utils # From 71a12679b57b7dfc9b3a90470785ce4dbe4a9bb7 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Sat, 23 Jun 2018 12:05:57 +0800 Subject: [PATCH 04/11] Add dictionary APIs in Sparse Merkle Tree --- trie/sparse_merkle_tree.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index 0f8484ca..46d662ad 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -101,3 +101,18 @@ def _hash_and_save(self, node): node_hash = keccak(node) self.db[node_hash] = node return node_hash + + # + # Dictionary API + # + def __getitem__(self, key): + return self.get(key) + + def __setitem__(self, key, value): + return self.set(key, value) + + def __delitem__(self, key): + return self.delete(key) + + def __contains__(self, key): + return self.exists(key) From 6b2d95e3051891dee23b5afa98988761878a4323 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Sun, 24 Jun 2018 13:06:22 +0800 Subject: [PATCH 05/11] Add basic get/set in Sparse Merkle Tree Test --- tests/test_sparse_merkle_tree.py | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/test_sparse_merkle_tree.py diff --git a/tests/test_sparse_merkle_tree.py b/tests/test_sparse_merkle_tree.py new file mode 100644 index 00000000..53385909 --- /dev/null +++ b/tests/test_sparse_merkle_tree.py @@ -0,0 +1,37 @@ +import pytest + +from hypothesis import ( + given, + strategies as st, + settings, +) + +from eth_hash.auto import ( + keccak, +) + +from trie.sparse_merkle_tree import ( + SparseMerkleTree, +) +from trie.constants import ( + EMPTY_NODE_HASHES, +) + + +@given(k=st.lists(st.binary(min_size=20, max_size=20), min_size=100, max_size=100, unique=True), + v=st.lists(st.binary(min_size=1), min_size=100, max_size=100)) +@settings(max_examples=10) +def test_sparse_merkle_tree(k, v, chosen_numbers): + kv_pairs = list(zip(k, v)) + + # Test basic get/set + trie = SparseMerkleTree(db={}) + for k, v in kv_pairs: + assert not trie.exists(k) + trie.set(k, v) + for k, v in kv_pairs: + assert trie.get(k) == v + trie.delete(k) + for k, _ in kv_pairs: + assert not trie.exists(k) + assert trie.root_hash == keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0]) From 029b1a9791b064f171dae292c399083f1bffbf95 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Sun, 24 Jun 2018 13:40:59 +0800 Subject: [PATCH 06/11] Add update in Sparse Merkle Tree Test --- tests/test_sparse_merkle_tree.py | 39 ++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/tests/test_sparse_merkle_tree.py b/tests/test_sparse_merkle_tree.py index 53385909..c6e5e61f 100644 --- a/tests/test_sparse_merkle_tree.py +++ b/tests/test_sparse_merkle_tree.py @@ -19,9 +19,15 @@ @given(k=st.lists(st.binary(min_size=20, max_size=20), min_size=100, max_size=100, unique=True), - v=st.lists(st.binary(min_size=1), min_size=100, max_size=100)) + v=st.lists(st.binary(min_size=1), min_size=100, max_size=100), + chosen_numbers=st.lists( + st.integers(min_value=1, max_value=99), + min_size=50, + max_size=100, + unique=True), + random=st.randoms()) @settings(max_examples=10) -def test_sparse_merkle_tree(k, v, chosen_numbers): +def test_sparse_merkle_tree(k, v, chosen_numbers, random): kv_pairs = list(zip(k, v)) # Test basic get/set @@ -35,3 +41,32 @@ def test_sparse_merkle_tree(k, v, chosen_numbers): for k, _ in kv_pairs: assert not trie.exists(k) assert trie.root_hash == keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0]) + + # Test single update + random.shuffle(kv_pairs) + for k, v in kv_pairs: + trie.set(k, v) + prior_to_update_root = trie.root_hash + for i in chosen_numbers: + # Update + trie.set(kv_pairs[i][0], i.to_bytes(i, byteorder='big')) + assert trie.get(kv_pairs[i][0]) == i.to_bytes(i, byteorder='big') + assert trie.root_hash != prior_to_update_root + # Un-update + trie.set(kv_pairs[i][0], kv_pairs[i][1]) + assert trie.root_hash == prior_to_update_root + + # Test batch update with different update order + # First batch update + for i in chosen_numbers: + trie.set(kv_pairs[i][0], i.to_bytes(i, byteorder='big')) + batch_updated_root = trie.root_hash + # Un-update + for i in chosen_numbers: + trie.set(kv_pairs[i][0], kv_pairs[i][1]) + assert trie.root_hash == prior_to_update_root + # Second batch update + random.shuffle(chosen_numbers) + for i in chosen_numbers: + trie.set(kv_pairs[i][0], i.to_bytes(i, byteorder='big')) + assert trie.root_hash == batch_updated_root From 6626bc85e29bb54c4f307309ddf7eb7bacc67c7b Mon Sep 17 00:00:00 2001 From: NIC619 Date: Mon, 25 Jun 2018 00:14:03 +0800 Subject: [PATCH 07/11] Use recursive call instead of for loop in set --- trie/sparse_merkle_tree.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index 46d662ad..9cc1d3a7 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -15,6 +15,7 @@ # sanity check assert EMPTY_LEAF_NODE_HASH == keccak(b'') + class SparseMerkleTree: def __init__(self, db): self.db = db @@ -49,31 +50,20 @@ def set(self, key, value): validate_length(key, 20) validate_is_bytes(value) - first_target_bit = 1 << 159 path = int.from_bytes(key, byteorder='big') - node_hash = self.root_hash - sibling_node_hashes = [] - # Record the sibling nodes along the way - for i in range(160): - if path & first_target_bit: - sibling_node_hashes.append(self.db[node_hash][:32]) - node_hash = self.db[node_hash][32:] - else: - sibling_node_hashes.append(self.db[node_hash][32:]) - node_hash = self.db[node_hash][:32] - first_target_bit >>= 1 + self.root_hash = self._set(value, path, 0, self.root_hash) + return - second_target_bit = 1 - node_hash = self._hash_and_save(value) - for i in range(160): - sibling_node_hash = sibling_node_hashes.pop() - if (path & second_target_bit): - parent_node_hash = self._hash_and_save(sibling_node_hash + node_hash) + def _set(self, value, path, depth, node_hash): + if depth == 160: + return self._hash_and_save(value) + else: + node = self.db[node_hash] + target_bit = 1 << (159 - depth) + if (path & target_bit): + return self._hash_and_save(node[:32] + self._set(value, path, depth+1, node[32:])) else: - parent_node_hash = self._hash_and_save(node_hash + sibling_node_hash) - second_target_bit <<= 1 - node_hash = parent_node_hash - self.root_hash = node_hash + return self._hash_and_save(self._set(value, path, depth+1, node[:32]) + node[32:]) def exists(self, key): validate_is_bytes(key) From 44c91d7f57a8ec0b2952dc3608c26281afa41a0c Mon Sep 17 00:00:00 2001 From: NIC619 Date: Tue, 17 Jul 2018 15:37:33 +0800 Subject: [PATCH 08/11] Fix linting error --- tests/test_sparse_merkle_tree.py | 2 -- trie/constants.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_sparse_merkle_tree.py b/tests/test_sparse_merkle_tree.py index c6e5e61f..83b28a4c 100644 --- a/tests/test_sparse_merkle_tree.py +++ b/tests/test_sparse_merkle_tree.py @@ -1,5 +1,3 @@ -import pytest - from hypothesis import ( given, strategies as st, diff --git a/trie/constants.py b/trie/constants.py index 93f5140e..dd1571f5 100644 --- a/trie/constants.py +++ b/trie/constants.py @@ -1,3 +1,5 @@ +from eth_hash.auto import keccak + BLANK_NODE = b'' # keccak(b'') BLANK_HASH = b"\xc5\xd2F\x01\x86\xf7#<\x92~}\xb2\xdc\xc7\x03\xc0\xe5\x00\xb6S\xca\x82';{\xfa\xd8\x04]\x85\xa4p" # noqa: E501 @@ -36,7 +38,6 @@ BYTE_0 = bytes([0]) # Constants for Sparse Merkle Tree -from eth_hash.auto import keccak EMPTY_LEAF_NODE_HASH = BLANK_HASH EMPTY_NODE_HASHES = [EMPTY_LEAF_NODE_HASH] for _ in range(159): From 13c0fc81411e5b4e76ee10ad07880c1522ffb92d Mon Sep 17 00:00:00 2001 From: NIC619 Date: Wed, 18 Jul 2018 15:04:40 +0800 Subject: [PATCH 09/11] Add TREE_HEIGHT constant --- trie/constants.py | 3 ++- trie/sparse_merkle_tree.py | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/trie/constants.py b/trie/constants.py index dd1571f5..f563c1ed 100644 --- a/trie/constants.py +++ b/trie/constants.py @@ -38,7 +38,8 @@ BYTE_0 = bytes([0]) # Constants for Sparse Merkle Tree +TREE_HEIGHT = 160 EMPTY_LEAF_NODE_HASH = BLANK_HASH EMPTY_NODE_HASHES = [EMPTY_LEAF_NODE_HASH] -for _ in range(159): +for _ in range(TREE_HEIGHT - 1): EMPTY_NODE_HASHES.insert(0, keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0])) diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index 9cc1d3a7..f7afbce3 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -3,6 +3,7 @@ ) from trie.constants import ( + TREE_HEIGHT, EMPTY_LEAF_NODE_HASH, EMPTY_NODE_HASHES, ) @@ -22,7 +23,7 @@ def __init__(self, db): # Initialize an empty tree with one branch self.root_hash = keccak(EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0]) self.db[self.root_hash] = EMPTY_NODE_HASHES[0] + EMPTY_NODE_HASHES[0] - for i in range(159): + for i in range(TREE_HEIGHT - 1): self.db[EMPTY_NODE_HASHES[i]] = EMPTY_NODE_HASHES[i+1] + EMPTY_NODE_HASHES[i+1] self.db[EMPTY_LEAF_NODE_HASH] = b'' @@ -30,10 +31,10 @@ def get(self, key): validate_is_bytes(key) validate_length(key, 20) - target_bit = 1 << 159 + target_bit = 1 << TREE_HEIGHT - 1 path = int.from_bytes(key, byteorder='big') node_hash = self.root_hash - for i in range(160): + for i in range(TREE_HEIGHT): if path & target_bit: node_hash = self.db[node_hash][32:] else: @@ -55,11 +56,11 @@ def set(self, key, value): return def _set(self, value, path, depth, node_hash): - if depth == 160: + if depth == TREE_HEIGHT: return self._hash_and_save(value) else: node = self.db[node_hash] - target_bit = 1 << (159 - depth) + target_bit = 1 << (TREE_HEIGHT - depth - 1) if (path & target_bit): return self._hash_and_save(node[:32] + self._set(value, path, depth+1, node[32:])) else: From fa8cd805057d0317b297a109a6dbad2a99657798 Mon Sep 17 00:00:00 2001 From: NIC619 Date: Wed, 18 Jul 2018 15:22:32 +0800 Subject: [PATCH 10/11] Raise KeyError if key does not exist --- trie/sparse_merkle_tree.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index f7afbce3..ef7a22da 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -7,6 +7,9 @@ EMPTY_LEAF_NODE_HASH, EMPTY_NODE_HASHES, ) +from trie.exceptions import ( + InvalidKeyError, +) from trie.validation import ( validate_is_bytes, validate_length, @@ -41,10 +44,9 @@ def get(self, key): node_hash = self.db[node_hash][:32] target_bit >>= 1 - if self.db[node_hash] is b'': - return None - else: - return self.db[node_hash] + if node_hash == EMPTY_LEAF_NODE_HASH: + raise KeyError("Key does not exist") + return self.db[node_hash] def set(self, key, value): validate_is_bytes(key) @@ -70,7 +72,11 @@ def exists(self, key): validate_is_bytes(key) validate_length(key, 20) - return self.get(key) is not None + try: + self.get(key) + return True + except KeyError: + return False def delete(self, key): """ From fdbb57d461643cd1dcea385406923774cc2c522a Mon Sep 17 00:00:00 2001 From: NIC619 Date: Wed, 18 Jul 2018 15:42:44 +0800 Subject: [PATCH 11/11] Prevent updating with the same value and shuffle the update order --- tests/test_sparse_merkle_tree.py | 7 +++++++ trie/sparse_merkle_tree.py | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_sparse_merkle_tree.py b/tests/test_sparse_merkle_tree.py index 83b28a4c..fcea183f 100644 --- a/tests/test_sparse_merkle_tree.py +++ b/tests/test_sparse_merkle_tree.py @@ -33,6 +33,7 @@ def test_sparse_merkle_tree(k, v, chosen_numbers, random): for k, v in kv_pairs: assert not trie.exists(k) trie.set(k, v) + prev_root = trie.root_hash for k, v in kv_pairs: assert trie.get(k) == v trie.delete(k) @@ -44,8 +45,13 @@ def test_sparse_merkle_tree(k, v, chosen_numbers, random): random.shuffle(kv_pairs) for k, v in kv_pairs: trie.set(k, v) + # Check trie root remains the same even in different insert order + assert trie.root_hash == prev_root prior_to_update_root = trie.root_hash for i in chosen_numbers: + # If new value is the same as current value, skip the update + if i.to_bytes(i, byteorder='big') == trie.get(kv_pairs[i][0]): + continue # Update trie.set(kv_pairs[i][0], i.to_bytes(i, byteorder='big')) assert trie.get(kv_pairs[i][0]) == i.to_bytes(i, byteorder='big') @@ -60,6 +66,7 @@ def test_sparse_merkle_tree(k, v, chosen_numbers, random): trie.set(kv_pairs[i][0], i.to_bytes(i, byteorder='big')) batch_updated_root = trie.root_hash # Un-update + random.shuffle(chosen_numbers) for i in chosen_numbers: trie.set(kv_pairs[i][0], kv_pairs[i][1]) assert trie.root_hash == prior_to_update_root diff --git a/trie/sparse_merkle_tree.py b/trie/sparse_merkle_tree.py index ef7a22da..c5943f74 100644 --- a/trie/sparse_merkle_tree.py +++ b/trie/sparse_merkle_tree.py @@ -7,9 +7,6 @@ EMPTY_LEAF_NODE_HASH, EMPTY_NODE_HASHES, ) -from trie.exceptions import ( - InvalidKeyError, -) from trie.validation import ( validate_is_bytes, validate_length,