Skip to content

Commit

Permalink
Add a --use-db-cache argument to edb test (#4563)
Browse files Browse the repository at this point in the history
Add a flag to tell `edb test` to attempt to maintain a cache of
initialized test databases.

This is unsound, of course--but no more unsound that
EDGEDB_DEBUG_BOOTSTRAP_CACHE_YOLO=1 is.

On my machine, this gets bootstrap time for my "fasttests" collection
of tests to 5s, down from 50s.

The change is most noticable when just running single tests: the time
to run a single select test is now 8s instead of 27s.
  • Loading branch information
msullivan committed Oct 21, 2022
1 parent 82a3312 commit 541e884
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 21 deletions.
25 changes: 18 additions & 7 deletions edb/testbase/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,14 +1378,16 @@ def test_cases_use_server(cases: Iterable[unittest.TestCase]) -> bool:
return True


async def setup_test_cases(cases, conn, num_jobs, verbose=False):
async def setup_test_cases(
cases, conn, num_jobs, try_cached_db=False, verbose=False):
setup = get_test_cases_setup(cases)

stats = []
if num_jobs == 1:
# Special case for --jobs=1
for _case, dbname, setup_script in setup:
await _setup_database(dbname, setup_script, conn, stats)
await _setup_database(
dbname, setup_script, conn, stats, try_cached_db)
if verbose:
print(f' -> {dbname}: OK', flush=True)
else:
Expand All @@ -1404,11 +1406,13 @@ async def controller(coro, dbname, *args):

for _case, dbname, setup_script in setup:
g.create_task(controller(
_setup_database, dbname, setup_script, conn, stats))
_setup_database, dbname, setup_script, conn, stats,
try_cached_db))
return stats


async def _setup_database(dbname, setup_script, conn_args, stats):
async def _setup_database(
dbname, setup_script, conn_args, stats, try_cached_db):
start_time = time.monotonic()
default_args = {
'user': edgedb_defines.EDGEDB_SUPERUSER,
Expand All @@ -1434,7 +1438,14 @@ async def _setup_database(dbname, setup_script, conn_args, stats):
)
except edgedb.DuplicateDatabaseDefinitionError:
# Eh, that's fine
pass
# And, if we are trying to use a cache of the database, assume
# the db is populated and return.
if try_cached_db:
elapsed = time.monotonic() - start_time
stats.append(
('setup::' + dbname,
{'running-time': elapsed, 'cached': True}))
return
except Exception as ex:
raise RuntimeError(
f'exception during creation of {dbname!r} test DB: '
Expand All @@ -1459,8 +1470,8 @@ async def _setup_database(dbname, setup_script, conn_args, stats):
await dbconn.aclose()

elapsed = time.monotonic() - start_time
stats.append(('setup::' + dbname, {'running-time': elapsed}))
return dbname
stats.append(
('setup::' + dbname, {'running-time': elapsed, 'cached': False}))


_lock_cnt = 0
Expand Down
11 changes: 8 additions & 3 deletions edb/tools/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,14 @@
@click.option('--backend-dsn', type=str,
help='Use the specified backend cluster instead of starting a '
'temporary local one.')
@click.option('--use-db-cache', is_flag=True,
help='Attempt to use a cache of the test databases (unsound!)')
@click.option('--data-dir', type=str,
help='Use a specified data dir')
def test(*, files, jobs, shard, include, exclude, verbose, quiet, debug,
output_format, warnings, failfast, shuffle, cov, repeat,
running_times_log_file, list_tests, backend_dsn, data_dir):
running_times_log_file, list_tests, backend_dsn, use_db_cache,
data_dir):
"""Run EdgeDB test suite.
Discovers and runs tests in the specified files or directories.
Expand Down Expand Up @@ -176,6 +179,7 @@ def test(*, files, jobs, shard, include, exclude, verbose, quiet, debug,
running_times_log_file=running_times_log_file,
list_tests=list_tests,
backend_dsn=backend_dsn,
try_cached_db=use_db_cache,
data_dir=data_dir,
)

Expand Down Expand Up @@ -252,7 +256,8 @@ def _coverage_wrapper(paths):

def _run(*, include, exclude, verbosity, files, jobs, output_format,
warnings, failfast, shuffle, repeat, selected_shard, total_shards,
running_times_log_file, list_tests, backend_dsn, data_dir):
running_times_log_file, list_tests, backend_dsn, try_cached_db,
data_dir):
suite = unittest.TestSuite()

total = 0
Expand Down Expand Up @@ -312,7 +317,7 @@ def _update_progress(n, unfiltered_n):
verbosity=verbosity, output_format=output_format,
warnings=warnings, num_workers=jobs,
failfast=failfast, shuffle=shuffle, backend_dsn=backend_dsn,
data_dir=data_dir)
try_cached_db=try_cached_db, data_dir=data_dir)

result = test_runner.run(
suite, selected_shard, total_shards, running_times_log_file,
Expand Down
71 changes: 60 additions & 11 deletions edb/tools/test/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import pathlib
import random
import re
import subprocess
import sys
import tempfile
import threading
Expand Down Expand Up @@ -796,7 +797,7 @@ class ParallelTextTestRunner:
def __init__(self, *, stream=None, num_workers=1, verbosity=1,
output_format=OutputFormat.auto, warnings=True,
failfast=False, shuffle=False, backend_dsn=None,
data_dir=None):
data_dir=None, try_cached_db=False):
self.stream = stream if stream is not None else sys.stderr
self.num_workers = num_workers
self.verbosity = verbosity
Expand All @@ -806,6 +807,7 @@ def __init__(self, *, stream=None, num_workers=1, verbosity=1,
self.output_format = output_format
self.backend_dsn = backend_dsn
self.data_dir = data_dir
self.try_cached_db = try_cached_db

def run(self, test, selected_shard, total_shards, running_times_log_file):
session_start = time.monotonic()
Expand Down Expand Up @@ -891,27 +893,74 @@ async def _setup():
nonlocal cluster
nonlocal conn

cache_file = (
devmode.get_dev_mode_cache_dir() / 'test_dbs.tar')

data_dir = self.data_dir

if self.try_cached_db and cache_file.is_file():
if self.verbosity >= 1:
self._echo(
f'(using DB cache from {cache_file}) ',
fg='white',
nl=False,
)

data_dir = tempfile.mkdtemp(prefix="edb-test-c-")

# We shell out to tar with subprocess instead of using
# tarfile because it is quite a bit faster.
subprocess.check_call(
('tar', 'xf', cache_file, '--strip-components=1'),
cwd=data_dir,
)

cluster = await tb.init_cluster(
backend_dsn=self.backend_dsn,
cleanup_atexit=False,
data_dir=self.data_dir,
data_dir=data_dir,
)

if self.verbosity > 1:
self._echo(' OK')

conn = cluster.get_connect_args()

if cluster.has_create_database():
return await tb.setup_test_cases(
cases,
conn,
self.num_workers,
verbose=self.verbosity > 1,
)
else:
if not cluster.has_create_database():
return []

stats = await tb.setup_test_cases(
cases,
conn,
self.num_workers,
verbose=self.verbosity > 1,
try_cached_db=self.try_cached_db,
)
if self.try_cached_db and any(
not x[1]['cached'] for x in stats
):
# We stop the cluster before making a cache of
# the data directory. This isn't strictly
# necessary, but it speeds up startup when
# restoring a cached directory, since postgres
# needs to go through recovery if the shutdown
# wasn't clean.
cluster.stop()
if self.verbosity > 1:
self._echo(
f'\n -> Writing DB cache to {cache_file} ...',
fg='white',
nl=False,
)
subprocess.check_output(
('tar', 'cf', cache_file, '.'),
cwd=cluster._data_dir,
stderr=subprocess.STDOUT,
)
await cluster.start(port=conn['port'])

return stats

setup_stats = asyncio.run(_setup())

if cluster.has_create_database():
Expand Down Expand Up @@ -988,7 +1037,7 @@ async def _setup():
if setup:
self._echo()
self._echo('Shutting down test cluster... ', nl=False)
tb._shutdown_cluster(cluster, destroy=True)
tb._shutdown_cluster(cluster, destroy=self.data_dir is None)
self._echo('OK.')

if result is not None:
Expand Down

0 comments on commit 541e884

Please sign in to comment.