Add a --use-db-cache argument to edb test (#4563)

Add a flag to tell `edb test` to attempt to maintain a cache of initialized test databases. This is unsound, of course--but no more unsound that EDGEDB_DEBUG_BOOTSTRAP_CACHE_YOLO=1 is. On my machine, this gets bootstrap time for my "fasttests" collection of tests to 5s, down from 50s. The change is most noticable when just running single tests: the time to run a single select test is now 8s instead of 27s.
edgedb · Oct 21, 2022 · 541e884 · 541e884
1 parent 82a3312
commit 541e884
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 21 deletions.
diff --git a/edb/testbase/server.py b/edb/testbase/server.py
@@ -1378,14 +1378,16 @@ def test_cases_use_server(cases: Iterable[unittest.TestCase]) -> bool:
             return True
 
 
-async def setup_test_cases(cases, conn, num_jobs, verbose=False):
+async def setup_test_cases(
+        cases, conn, num_jobs, try_cached_db=False, verbose=False):
     setup = get_test_cases_setup(cases)
 
     stats = []
     if num_jobs == 1:
         # Special case for --jobs=1
         for _case, dbname, setup_script in setup:
-            await _setup_database(dbname, setup_script, conn, stats)
+            await _setup_database(
+                dbname, setup_script, conn, stats, try_cached_db)
             if verbose:
                 print(f' -> {dbname}: OK', flush=True)
     else:
@@ -1404,11 +1406,13 @@ async def controller(coro, dbname, *args):
 
             for _case, dbname, setup_script in setup:
                 g.create_task(controller(
-                    _setup_database, dbname, setup_script, conn, stats))
+                    _setup_database, dbname, setup_script, conn, stats,
+                    try_cached_db))
     return stats
 
 
-async def _setup_database(dbname, setup_script, conn_args, stats):
+async def _setup_database(
+        dbname, setup_script, conn_args, stats, try_cached_db):
     start_time = time.monotonic()
     default_args = {
         'user': edgedb_defines.EDGEDB_SUPERUSER,
@@ -1434,7 +1438,14 @@ async def _setup_database(dbname, setup_script, conn_args, stats):
         )
     except edgedb.DuplicateDatabaseDefinitionError:
         # Eh, that's fine
-        pass
+        # And, if we are trying to use a cache of the database, assume
+        # the db is populated and return.
+        if try_cached_db:
+            elapsed = time.monotonic() - start_time
+            stats.append(
+                ('setup::' + dbname,
+                 {'running-time': elapsed, 'cached': True}))
+            return
     except Exception as ex:
         raise RuntimeError(
             f'exception during creation of {dbname!r} test DB: '
@@ -1459,8 +1470,8 @@ async def _setup_database(dbname, setup_script, conn_args, stats):
         await dbconn.aclose()
 
     elapsed = time.monotonic() - start_time
-    stats.append(('setup::' + dbname, {'running-time': elapsed}))
-    return dbname
+    stats.append(
+        ('setup::' + dbname, {'running-time': elapsed, 'cached': False}))
 
 
 _lock_cnt = 0

diff --git a/edb/tools/test/__init__.py b/edb/tools/test/__init__.py
@@ -95,11 +95,14 @@
 @click.option('--backend-dsn', type=str,
               help='Use the specified backend cluster instead of starting a '
                    'temporary local one.')
+@click.option('--use-db-cache', is_flag=True,
+              help='Attempt to use a cache of the test databases (unsound!)')
 @click.option('--data-dir', type=str,
               help='Use a specified data dir')
 def test(*, files, jobs, shard, include, exclude, verbose, quiet, debug,
          output_format, warnings, failfast, shuffle, cov, repeat,
-         running_times_log_file, list_tests, backend_dsn, data_dir):
+         running_times_log_file, list_tests, backend_dsn, use_db_cache,
+         data_dir):
     """Run EdgeDB test suite.
 
     Discovers and runs tests in the specified files or directories.
@@ -176,6 +179,7 @@ def test(*, files, jobs, shard, include, exclude, verbose, quiet, debug,
         running_times_log_file=running_times_log_file,
         list_tests=list_tests,
         backend_dsn=backend_dsn,
+        try_cached_db=use_db_cache,
         data_dir=data_dir,
     )
 
@@ -252,7 +256,8 @@ def _coverage_wrapper(paths):
 
 def _run(*, include, exclude, verbosity, files, jobs, output_format,
          warnings, failfast, shuffle, repeat, selected_shard, total_shards,
-         running_times_log_file, list_tests, backend_dsn, data_dir):
+         running_times_log_file, list_tests, backend_dsn, try_cached_db,
+         data_dir):
     suite = unittest.TestSuite()
 
     total = 0
@@ -312,7 +317,7 @@ def _update_progress(n, unfiltered_n):
             verbosity=verbosity, output_format=output_format,
             warnings=warnings, num_workers=jobs,
             failfast=failfast, shuffle=shuffle, backend_dsn=backend_dsn,
-            data_dir=data_dir)
+            try_cached_db=try_cached_db, data_dir=data_dir)
 
         result = test_runner.run(
             suite, selected_shard, total_shards, running_times_log_file,

diff --git a/edb/tools/test/runner.py b/edb/tools/test/runner.py
@@ -37,6 +37,7 @@
 import pathlib
 import random
 import re
+import subprocess
 import sys
 import tempfile
 import threading
@@ -796,7 +797,7 @@ class ParallelTextTestRunner:
     def __init__(self, *, stream=None, num_workers=1, verbosity=1,
                  output_format=OutputFormat.auto, warnings=True,
                  failfast=False, shuffle=False, backend_dsn=None,
-                 data_dir=None):
+                 data_dir=None, try_cached_db=False):
         self.stream = stream if stream is not None else sys.stderr
         self.num_workers = num_workers
         self.verbosity = verbosity
@@ -806,6 +807,7 @@ def __init__(self, *, stream=None, num_workers=1, verbosity=1,
         self.output_format = output_format
         self.backend_dsn = backend_dsn
         self.data_dir = data_dir
+        self.try_cached_db = try_cached_db
 
     def run(self, test, selected_shard, total_shards, running_times_log_file):
         session_start = time.monotonic()
@@ -891,27 +893,74 @@ async def _setup():
                     nonlocal cluster
                     nonlocal conn
 
+                    cache_file = (
+                        devmode.get_dev_mode_cache_dir() / 'test_dbs.tar')
+
+                    data_dir = self.data_dir
+
+                    if self.try_cached_db and cache_file.is_file():
+                        if self.verbosity >= 1:
+                            self._echo(
+                                f'(using DB cache from {cache_file}) ',
+                                fg='white',
+                                nl=False,
+                            )
+
+                        data_dir = tempfile.mkdtemp(prefix="edb-test-c-")
+
+                        # We shell out to tar with subprocess instead of using
+                        # tarfile because it is quite a bit faster.
+                        subprocess.check_call(
+                            ('tar', 'xf', cache_file, '--strip-components=1'),
+                            cwd=data_dir,
+                        )
+
                     cluster = await tb.init_cluster(
                         backend_dsn=self.backend_dsn,
                         cleanup_atexit=False,
-                        data_dir=self.data_dir,
+                        data_dir=data_dir,
                     )
 
                     if self.verbosity > 1:
                         self._echo(' OK')
 
                     conn = cluster.get_connect_args()
 
-                    if cluster.has_create_database():
-                        return await tb.setup_test_cases(
-                            cases,
-                            conn,
-                            self.num_workers,
-                            verbose=self.verbosity > 1,
-                        )
-                    else:
+                    if not cluster.has_create_database():
                         return []
 
+                    stats = await tb.setup_test_cases(
+                        cases,
+                        conn,
+                        self.num_workers,
+                        verbose=self.verbosity > 1,
+                        try_cached_db=self.try_cached_db,
+                    )
+                    if self.try_cached_db and any(
+                        not x[1]['cached'] for x in stats
+                    ):
+                        # We stop the cluster before making a cache of
+                        # the data directory. This isn't strictly
+                        # necessary, but it speeds up startup when
+                        # restoring a cached directory, since postgres
+                        # needs to go through recovery if the shutdown
+                        # wasn't clean.
+                        cluster.stop()
+                        if self.verbosity > 1:
+                            self._echo(
+                                f'\n -> Writing DB cache to {cache_file} ...',
+                                fg='white',
+                                nl=False,
+                            )
+                        subprocess.check_output(
+                            ('tar', 'cf', cache_file, '.'),
+                            cwd=cluster._data_dir,
+                            stderr=subprocess.STDOUT,
+                        )
+                        await cluster.start(port=conn['port'])
+
+                    return stats
+
                 setup_stats = asyncio.run(_setup())
 
                 if cluster.has_create_database():
@@ -988,7 +1037,7 @@ async def _setup():
             if setup:
                 self._echo()
                 self._echo('Shutting down test cluster... ', nl=False)
-                tb._shutdown_cluster(cluster, destroy=True)
+                tb._shutdown_cluster(cluster, destroy=self.data_dir is None)
                 self._echo('OK.')
 
         if result is not None: