Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Database for tracking key accesses #1918

Merged
merged 5 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ coverage:
open htmlcov/index.html

build-docs:
pip install -e .[doc]
cd docs/; sphinx-build -W -T -E . _build/html
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the idea here was that one would be able to run make build-docs without the assumption that docs dependencies were already installed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, if it was really fast, I probably wouldn't object, but the pip install actually takes a while. So if you run make build-docs over and over, it gets old really fast. Also, a bunch of our other make targets assume that you have the appropriate installed tools (wheel, setuptools, towncrier, etc). We could probably document them all better, though. 👍


doctest:
Expand Down
2 changes: 1 addition & 1 deletion docs/guides/understanding_the_mining_process.rst
Original file line number Diff line number Diff line change
Expand Up @@ -385,4 +385,4 @@ zero value transfer transaction.
... )

>>> chain.mine_block(mix_hash=mix_hash, nonce=nonce)
<ByzantiumBlock(#Block #1)>
<ByzantiumBlock(#Block #1-0x41f6..2913)>
13 changes: 12 additions & 1 deletion eth/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,13 +403,24 @@ def delete(self, key: bytes) -> None:
...


class AtomicWriteBatchAPI(DatabaseAPI):
"""
The readable/writeable object returned by an atomic database when we start building
a batch of writes to commit.

Reads to this database will observe writes written during batching,
but the writes will not actually persist until this object is committed.
"""
pass


class AtomicDatabaseAPI(DatabaseAPI):
"""
Like ``BatchDB``, but immediately write out changes if they are
not in an ``atomic_batch()`` context.
"""
@abstractmethod
def atomic_batch(self) -> ContextManager[DatabaseAPI]:
def atomic_batch(self) -> ContextManager[AtomicWriteBatchAPI]:
"""
Return a :class:`~typing.ContextManager` to write an atomic batch to the database.
"""
Expand Down
113 changes: 113 additions & 0 deletions eth/db/accesslog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from contextlib import contextmanager
import logging
from typing import (
Iterator,
FrozenSet,
Set,
)

from eth.abc import (
AtomicWriteBatchAPI,
AtomicDatabaseAPI,
DatabaseAPI,
)
from eth.db.backends.base import (
BaseDB,
)
from eth.db.atomic import (
BaseAtomicDB,
)


class KeyAccessLoggerDB(BaseDB):
"""
Wraps around a database, and tracks all the keys that were read since initialization.
"""

logger = logging.getLogger("eth.db.KeyAccessLoggerDB")

def __init__(self, wrapped_db: DatabaseAPI, log_missing_keys: bool=True) -> None:
"""
:param log_missing_keys: True if a key is added to :attr:`keys_read` even if the
key/value does not exist in the database.
"""
self.wrapped_db = wrapped_db
self._keys_read: Set[bytes] = set()
self._log_missing_keys = log_missing_keys

@property
def keys_read(self) -> FrozenSet[bytes]:
# Make a defensive copy so callers can't modify the list externally
return frozenset(self._keys_read)

def __getitem__(self, key: bytes) -> bytes:
try:
result = self.wrapped_db.__getitem__(key)
except KeyError:
if self._log_missing_keys:
self._keys_read.add(key)
raise
else:
self._keys_read.add(key)
return result

def __setitem__(self, key: bytes, value: bytes) -> None:
self.wrapped_db[key] = value

def __delitem__(self, key: bytes) -> None:
del self.wrapped_db[key]

def _exists(self, key: bytes) -> bool:
does_exist = key in self.wrapped_db
if does_exist or self._log_missing_keys:
self._keys_read.add(key)
return does_exist


class KeyAccessLoggerAtomicDB(BaseAtomicDB):
"""
Wraps around an atomic database, and tracks all the keys that were read since initialization.
"""
logger = logging.getLogger("eth.db.KeyAccessLoggerAtomicDB")

def __init__(self, wrapped_db: AtomicDatabaseAPI, log_missing_keys: bool=True) -> None:
"""
:param log_missing_keys: True if a key is added to :attr:`keys_read` even if the
key/value does not exist in the database.
"""
self.wrapped_db = wrapped_db
self._keys_read: Set[bytes] = set()
self._log_missing_keys = log_missing_keys

@property
def keys_read(self) -> FrozenSet[bytes]:
# Make a defensive copy so callers can't modify the list externally
return frozenset(self._keys_read)

def __getitem__(self, key: bytes) -> bytes:
try:
result = self.wrapped_db.__getitem__(key)
except KeyError:
if self._log_missing_keys:
self._keys_read.add(key)
raise
else:
self._keys_read.add(key)
return result

def __setitem__(self, key: bytes, value: bytes) -> None:
self.wrapped_db[key] = value

def __delitem__(self, key: bytes) -> None:
del self.wrapped_db[key]

def _exists(self, key: bytes) -> bool:
does_exist = key in self.wrapped_db
if does_exist or self._log_missing_keys:
self._keys_read.add(key)
return does_exist

@contextmanager
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
with self.wrapped_db.atomic_batch() as readable_batch:
yield readable_batch
9 changes: 5 additions & 4 deletions eth/db/atomic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
)

from eth.abc import (
AtomicWriteBatchAPI,
DatabaseAPI,
)

Expand All @@ -17,7 +18,7 @@
DBDiffTracker,
DiffMissingError,
)
from eth.db.backends.base import BaseDB, BaseAtomicDB
from eth.db.backends.base import BaseAtomicDB, BaseDB
from eth.db.backends.memory import MemoryDB


Expand Down Expand Up @@ -46,12 +47,12 @@ def _exists(self, key: bytes) -> bool:
return key in self.wrapped_db

@contextmanager
def atomic_batch(self) -> Iterator['AtomicDBWriteBatch']:
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
with AtomicDBWriteBatch._commit_unless_raises(self) as readable_batch:
yield readable_batch


class AtomicDBWriteBatch(BaseDB):
class AtomicDBWriteBatch(BaseDB, AtomicWriteBatchAPI):
"""
This is returned by a BaseAtomicDB during an atomic_batch, to provide a temporary view
of the database, before commit.
Expand Down Expand Up @@ -112,7 +113,7 @@ def _exists(self, key: bytes) -> bool:

@classmethod
@contextmanager
def _commit_unless_raises(cls, write_target_db: DatabaseAPI) -> Iterator['AtomicDBWriteBatch']:
def _commit_unless_raises(cls, write_target_db: DatabaseAPI) -> Iterator[AtomicWriteBatchAPI]:
"""
Commit all writes inside the context, unless an exception was raised.

Expand Down
11 changes: 7 additions & 4 deletions eth/db/backends/level.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import logging
from pathlib import Path
from typing import (
Generator,
Iterator,
TYPE_CHECKING,
)

from eth_utils import ValidationError

from eth.abc import DatabaseAPI
from eth.abc import (
AtomicWriteBatchAPI,
DatabaseAPI,
)
from eth.db.diff import (
DBDiffTracker,
DiffMissingError,
Expand Down Expand Up @@ -67,7 +70,7 @@ def __delitem__(self, key: bytes) -> None:
self.db.delete(key)

@contextmanager
def atomic_batch(self) -> Generator['LevelDBWriteBatch', None, None]:
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
with self.db.write_batch(transaction=True) as atomic_batch:
readable_batch = LevelDBWriteBatch(self, atomic_batch)
try:
Expand All @@ -76,7 +79,7 @@ def atomic_batch(self) -> Generator['LevelDBWriteBatch', None, None]:
readable_batch.decommission()


class LevelDBWriteBatch(BaseDB):
class LevelDBWriteBatch(BaseDB, AtomicWriteBatchAPI):
"""
A native leveldb write batch does not permit reads on the in-progress data.
This class fills that gap, by tracking the in-progress diff, and adding
Expand Down
8 changes: 6 additions & 2 deletions eth/rlp/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
Type
)

from eth_utils import (
humanize_hash,
)

from eth._utils.datatypes import (
Configurable,
)

from eth.abc import (
BlockAPI,
SignedTransactionAPI,
Expand All @@ -29,4 +32,5 @@ def __repr__(self) -> str:
return f'<{self.__class__.__name__}(#{str(self)})>'

def __str__(self) -> str:
return f"Block #{self.number}"
clipped_hash = humanize_hash(self.hash)
return f"Block #{self.number}-0x{clipped_hash}"
14 changes: 7 additions & 7 deletions eth/tools/mining.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from eth.abc import (
BlockAPI,
VirtualMachineAPI,
)
from eth.consensus import (
pow,
)

from eth.rlp.blocks import (
BaseBlock,
)


class POWMiningMixin:
class POWMiningMixin(VirtualMachineAPI):
"""
A VM that does POW mining as well. Should be used only in tests, when we
need to programatically populate a ChainDB.
"""
def finalize_block(self, block: BaseBlock) -> BaseBlock:
block = super().finalize_block(block) # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always nice to get rid of type: ignore

def finalize_block(self, block: BlockAPI) -> BlockAPI:
block = super().finalize_block(block)
nonce, mix_hash = pow.mine_pow_nonce(
block.number, block.header.mining_hash, block.header.difficulty)
return block.copy(header=block.header.copy(nonce=nonce, mix_hash=mix_hash))
3 changes: 3 additions & 0 deletions newsfragments/1918.internal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added :class:`~eth.db.accesslog.KeyAccessLoggerDB` and its atomic twin; faster ``make
validate-docs`` (but you have to remember to ``pip install -e .[doc]`` yourself); ``str(block)`` now
includes the first 3 bytes of the block hash.
69 changes: 69 additions & 0 deletions tests/database/test_accesslog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from hypothesis import (
given,
strategies as st,
)
import pytest

from eth.db.accesslog import (
KeyAccessLoggerDB,
KeyAccessLoggerAtomicDB,
)
from eth.db.backends.memory import MemoryDB


@given(st.lists(st.binary()))
@pytest.mark.parametrize('DB', (
lambda: KeyAccessLoggerAtomicDB(MemoryDB()),
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=False),
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=True),
lambda: KeyAccessLoggerDB(MemoryDB()),
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=False),
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=True),
))
def test_log_accesses(DB, keys):
db = DB()
assert len(db.keys_read) == 0
for key in keys:
db[key] = b'placeholder' # value doesn't matter
assert db[key] == b'placeholder'

for key in keys:
assert key in db.keys_read


@pytest.mark.parametrize('DB', (
lambda: KeyAccessLoggerAtomicDB(MemoryDB()),
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=True),
lambda: KeyAccessLoggerDB(MemoryDB()),
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=True),
))
def test_logs_missing_keys(DB):
db_logs_missing = DB()
assert len(db_logs_missing.keys_read) == 0
assert b'exist-test' not in db_logs_missing

assert b'exist-test' in db_logs_missing.keys_read

with pytest.raises(KeyError, match='get-test'):
db_logs_missing[b'get-test']

assert b'get-test' in db_logs_missing.keys_read
assert len(db_logs_missing.keys_read) == 2


@pytest.mark.parametrize('DB', (
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=False),
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=False),
))
def test_dont_log_missing_keys(DB):
db_doesnt_log_missing = DB()
assert len(db_doesnt_log_missing.keys_read) == 0
assert b'exist-test' not in db_doesnt_log_missing

assert b'exist-test' not in db_doesnt_log_missing.keys_read

with pytest.raises(KeyError, match='get-test'):
db_doesnt_log_missing[b'get-test']

assert b'get-test' not in db_doesnt_log_missing.keys_read
assert len(db_doesnt_log_missing.keys_read) == 0
19 changes: 18 additions & 1 deletion tests/database/test_database_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import pytest
from eth.db.accesslog import (
KeyAccessLoggerAtomicDB,
KeyAccessLoggerDB,
)
from eth.db.backends.memory import MemoryDB
from eth.db.journal import JournalDB
from eth.db.batch import BatchDB
Expand All @@ -8,7 +12,15 @@
from eth.tools.db.base import DatabaseAPITestSuite


@pytest.fixture(params=[JournalDB, BatchDB, MemoryDB, AtomicDB, CacheDB])
@pytest.fixture(params=[
JournalDB,
BatchDB,
MemoryDB,
AtomicDB,
CacheDB,
KeyAccessLoggerAtomicDB,
KeyAccessLoggerDB,
])
def db(request):
base_db = MemoryDB()
if request.param is JournalDB:
Expand All @@ -23,6 +35,11 @@ def db(request):
yield batch
elif request.param is CacheDB:
yield CacheDB(base_db)
elif request.param is KeyAccessLoggerAtomicDB:
atomic_db = AtomicDB(base_db)
yield KeyAccessLoggerAtomicDB(atomic_db)
elif request.param is KeyAccessLoggerDB:
yield KeyAccessLoggerDB(base_db)
else:
raise Exception("Invariant")

Expand Down