diff --git a/lib/pbench/cli/server/report.py b/lib/pbench/cli/server/report.py
index 5490a4cf96..8702dcfc0d 100644
--- a/lib/pbench/cli/server/report.py
+++ b/lib/pbench/cli/server/report.py
@@ -486,12 +486,12 @@ def report(
 
     try:
         config = config_setup(context)
-        logger = get_pbench_logger("report-generator", config)
+        logger = get_pbench_logger("pbench-report-generator", config)
         if any((all, archive, backup, cache)):
             cache_m = CacheManager(config, logger)
             verifier.status("starting discovery")
             watcher.update("discovering cache")
-            cache_m.full_discovery()
+            cache_m.full_discovery(search=False)
             watcher.update("processing reports")
             verifier.status("finished discovery")
             if all or archive:
diff --git a/lib/pbench/cli/server/tree_manage.py b/lib/pbench/cli/server/tree_manage.py
index 05b8925f8a..4c29ae905d 100644
--- a/lib/pbench/cli/server/tree_manage.py
+++ b/lib/pbench/cli/server/tree_manage.py
@@ -80,7 +80,7 @@ def tree_manage(
     logger = None
     try:
         config = config_setup(context)
-        logger = get_pbench_logger("cachemanager", config)
+        logger = get_pbench_logger("pbench-tree-manager", config)
         cache_m = CacheManager(config, logger)
         cache_m.full_discovery()
         if display:
diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py
index 1507c46d61..20f3c590f7 100644
--- a/lib/pbench/server/api/resources/datasets_contents.py
+++ b/lib/pbench/server/api/resources/datasets_contents.py
@@ -1,5 +1,4 @@
 from http import HTTPStatus
-from pathlib import Path
 
 from flask import current_app, jsonify
 from flask.wrappers import Request, Response
@@ -22,8 +21,6 @@
     BadDirpath,
     CacheExtractBadPath,
     CacheManager,
-    CacheObject,
-    CacheType,
     TarballNotFound,
 )
 from pbench.server.database.models.datasets import Dataset
@@ -65,100 +62,22 @@ def _get(self, params: ApiParams, req: Request, context: ApiContext) -> Response
 
         dataset: Dataset = params.uri["dataset"]
         target = params.uri.get("target")
-        path = Path("." if target in ("/", None) else target)
+        path = "." if target in ("/", None) else target
+
+        prefix = current_app.server_config.rest_uri
+        origin = (
+            f"{self._get_uri_base(req).host}{prefix}/datasets/{dataset.resource_id}"
+        )
 
         cache_m = CacheManager(self.config, current_app.logger)
         try:
-            info = cache_m.find_entry(dataset.resource_id, path)
+            info = cache_m.get_contents(dataset.resource_id, path, origin)
         except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e:
             raise APIAbort(HTTPStatus.NOT_FOUND, str(e))
         except Exception as e:
             raise APIInternalError(f"Cache find error: {str(e)!r}")
 
-        prefix = current_app.server_config.rest_uri
-        origin = (
-            f"{self._get_uri_base(req).host}{prefix}/datasets/{dataset.resource_id}"
-        )
-
-        details: CacheObject = info["details"]
-        if details.type is CacheType.DIRECTORY:
-            children = info["children"] if "children" in info else {}
-            dir_list = []
-            file_list = []
-
-            for c, value in children.items():
-                d: CacheObject = value["details"]
-                if d.type is CacheType.DIRECTORY:
-                    dir_list.append(
-                        {
-                            "name": c,
-                            "type": d.type.name,
-                            "uri": f"{origin}/contents/{d.location}",
-                        }
-                    )
-                elif d.type is CacheType.SYMLINK:
-                    if d.resolve_type is CacheType.DIRECTORY:
-                        uri = f"{origin}/contents/{d.resolve_path}"
-                    elif d.resolve_type is CacheType.FILE:
-                        uri = f"{origin}/inventory/{d.resolve_path}"
-                    else:
-                        uri = f"{origin}/inventory/{d.location}"
-                    file_list.append(
-                        {
-                            "name": c,
-                            "type": d.type.name,
-                            "link": str(d.resolve_path),
-                            "link_type": d.resolve_type.name,
-                            "uri": uri,
-                        }
-                    )
-                else:
-                    r = {
-                        "name": c,
-                        "type": d.type.name,
-                        "uri": f"{origin}/inventory/{d.location}",
-                    }
-                    if d.type is CacheType.FILE:
-                        r["size"] = d.size
-                    file_list.append(r)
-
-            dir_list.sort(key=lambda d: d["name"])
-            file_list.sort(key=lambda d: d["name"])
-
-            # Normalize because we want the "root" directory to be reported as
-            # "" rather than as Path's favored "."
-            loc = str(details.location)
-            name = details.name
-            if loc == ".":
-                loc = ""
-                name = ""
-            val = {
-                "name": name,
-                "type": details.type.name,
-                "directories": dir_list,
-                "files": file_list,
-                "uri": f"{origin}/contents/{loc}",
-            }
-        else:
-            access = "inventory"
-            link = str(details.location)
-            if details.type is CacheType.SYMLINK:
-                if details.resolve_type is CacheType.DIRECTORY:
-                    access = "contents"
-                if details.resolve_type in (CacheType.FILE, CacheType.DIRECTORY):
-                    link = str(details.resolve_path)
-            val = {
-                "name": details.name,
-                "type": details.type.name,
-                "uri": f"{origin}/{access}/{link}",
-            }
-            if details.type is CacheType.SYMLINK:
-                val["link"] = link
-                val["link_type"] = details.resolve_type.name
-            elif details.type is CacheType.FILE:
-                val["size"] = details.size
-
         try:
-            return jsonify(val)
+            return jsonify(info)
         except Exception as e:
-            raise APIInternalError(f"JSONIFY {val}: {str(e)!r}")
+            raise APIInternalError(f"JSONIFY {info}: {str(e)!r}")
diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py
index 3c8bb6bacd..2c47401cc8 100644
--- a/lib/pbench/server/cache_manager.py
+++ b/lib/pbench/server/cache_manager.py
@@ -6,6 +6,7 @@
 import fcntl
 from logging import Logger
 import math
+import os
 from pathlib import Path
 import shlex
 import shutil
@@ -14,9 +15,11 @@
 from typing import Any, IO, Optional, Union
 
 import humanize
+from sqlalchemy import and_
 
 from pbench.common import MetadataLog, selinux
 from pbench.server import JSONOBJECT, OperationCode, PathLike, PbenchServerConfig
+from pbench.server.database.database import Database
 from pbench.server.database.models.audit import Audit, AuditStatus, AuditType
 from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
 from pbench.server.utils import get_tarball_md5
@@ -799,6 +802,131 @@ def extract(tarball_path: Path, path: str) -> Inventory:
             tarball_path, f"Unexpected error from {tar_path}: {error_text!r}"
         )
 
+    def get_contents(self, path: str, origin: str) -> JSONOBJECT:
+        """Return a description of a directory.
+
+        Args:
+            path: relative path within the tarball
+            origin: root URI path for the dataset
+
+        Returns:
+            A "json" dict describing the target.
+        """
+
+        with LockManager(self.lock) as lock:
+            artifact: Path = self.get_results(lock) / path
+            try:
+                # NOTE: os.path.abspath() removes ".." but Path.absolute(),
+                # doesn't, meaning the latter allows a query with path "..",
+                # and we don't want to allow reaching outside of the tarball.
+                arel = Path(os.path.abspath(artifact)).relative_to(self.unpacked)
+            except Exception:
+                raise CacheExtractError(self.name, path)
+            if artifact.is_dir() and not artifact.is_symlink():
+                dir_list = []
+                file_list = []
+                for f in artifact.iterdir():
+                    relative = f.relative_to(self.unpacked)
+                    if f.is_symlink():
+                        append_to = file_list
+                        target = f.resolve()
+                        try:
+                            link = target.relative_to(self.unpacked)
+                        except Exception:
+                            link = f.readlink()
+                            uri = f"{origin}/inventory/{relative}"
+                            link_type = CacheType.BROKEN
+                        else:
+                            if target.is_dir():
+                                uri = f"{origin}/contents/{link}"
+                                link_type = CacheType.DIRECTORY
+                                append_to = dir_list
+                            elif target.is_file():
+                                uri = f"{origin}/inventory/{link}"
+                                link_type = CacheType.FILE
+                            else:
+                                uri = f"{origin}/inventory/{relative}"
+                                link_type = CacheType.OTHER
+                        append_to.append(
+                            {
+                                "name": f.name,
+                                "type": CacheType.SYMLINK.name,
+                                "link": str(link),
+                                "link_type": link_type.name,
+                                "uri": uri,
+                            }
+                        )
+                    elif f.is_dir():
+                        dir_list.append(
+                            {
+                                "name": f.name,
+                                "type": CacheType.DIRECTORY.name,
+                                "uri": f"{origin}/contents/{relative}",
+                            }
+                        )
+                    else:
+                        t = CacheType.FILE if f.is_file() else CacheType.OTHER
+                        r = {
+                            "name": f.name,
+                            "type": t.name,
+                            "uri": f"{origin}/inventory/{relative}",
+                        }
+                        if t is CacheType.FILE:
+                            r["size"] = f.stat().st_size
+                        file_list.append(r)
+
+                # Normalize because we want the "root" directory to be reported as
+                # "" rather than as Path's favored "."
+                loc = str(arel)
+                name = artifact.name
+                if loc == ".":
+                    loc = ""
+                    name = ""
+                dir_list.sort(key=lambda d: d["name"])
+                file_list.sort(key=lambda d: d["name"])
+                val = {
+                    "name": name,
+                    "type": CacheType.DIRECTORY.name,
+                    "directories": dir_list,
+                    "files": file_list,
+                    "uri": f"{origin}/contents/{loc}",
+                }
+            else:
+                access = "inventory"
+                link_type = CacheType.FILE
+                ltype = CacheType.OTHER
+                if artifact.is_symlink():
+                    link_type = CacheType.SYMLINK
+                    try:
+                        target = artifact.resolve(strict=True)
+                        trel = target.relative_to(self.unpacked)
+                        if target.is_dir():
+                            access = "contents"
+                            ltype = CacheType.DIRECTORY
+                            arel = trel
+                        elif target.is_file():
+                            ltype = CacheType.FILE
+                            arel = trel
+                        else:
+                            ltype = CacheType.OTHER
+                    except Exception:
+                        ltype = CacheType.BROKEN
+                        trel = artifact.readlink()
+                elif not artifact.is_file():
+                    link_type = CacheType.OTHER
+
+                val = {
+                    "name": artifact.name,
+                    "type": link_type.name,
+                    "uri": f"{origin}/{access}/{arel}",
+                }
+                if link_type is CacheType.SYMLINK:
+                    val["link"] = str(trel)
+                    val["link_type"] = ltype.name
+                if link_type is CacheType.FILE or ltype is CacheType.FILE:
+                    val["size"] = artifact.stat().st_size
+            return val
+
     def get_inventory(self, path: str) -> dict[str, Any]:
         """Return a JSON description of a tarball member file.
 
@@ -944,6 +1072,8 @@ def get_results(self, lock: LockManager) -> Path:
         """
 
         if not self.unpacked:
+            start = time.time()
+            reclaim = start
             lock.upgrade()
 
             # If necessary, attempt to reclaim some unused cache so we have
@@ -952,6 +1082,7 @@ def get_results(self, lock: LockManager) -> Path:
                 self.controller.cache_manager.reclaim_cache(
                     goal_bytes=self.get_unpacked_size() + RECLAIM_BYTES_PAD
                 )
+                reclaim = time.time()
 
             audit = None
             error = None
@@ -970,6 +1101,7 @@ def get_results(self, lock: LockManager) -> Path:
                     "Unable to audit unpack for {}: '{}'", self.name, e
                 )
 
+            ustart = time.time()
             try:
                 tar_command = "tar -x --no-same-owner --delay-directory-restore "
                 tar_command += f"--force-local --file='{str(self.tarball_path)}'"
@@ -993,17 +1125,19 @@ def get_results(self, lock: LockManager) -> Path:
                         attributes=attributes,
                     )
                 lock.downgrade()
-
-        # Even if we have an unpacked directory, if it wasn't done under this
-        # CacheManager instance we may not have a cachemap, so be prepared to
-        # build one.
-        if not self.cachemap:
-            self.build_map()
+            uend = time.time()
+            self.logger.info(
+                "{}: reclaim {:.3f}, unpack {:.3f}",
+                self.name,
+                reclaim - start,
+                uend - ustart,
+            )
 
         self.last_ref.touch(exist_ok=True)
 
         # If we have a Dataset, and haven't already done this, compute the
         # unpacked size and record it in metadata so we can use it later.
+        ssize = time.time()
         if self.dataset and not Metadata.getvalue(
             self.dataset, Metadata.SERVER_UNPACKED
         ):
@@ -1019,6 +1153,7 @@ def get_results(self, lock: LockManager) -> Path:
                     Metadata.setvalue(self.dataset, Metadata.SERVER_UNPACKED, size)
             except Exception as e:
                 self.logger.warning("usage check failed: {}", e)
+        self.logger.info("{}: size update {:.3f}", self.name, time.time() - ssize)
         return self.unpacked
 
     def cache_delete(self):
@@ -1083,7 +1218,7 @@ class Controller:
     but the audit report generator will flag it.
     """
 
-    def __init__(self, path: Path, cache: Path, logger: Logger):
+    def __init__(self, path: Path, cache: Path, logger: Logger, discover: bool = True):
         """Manage the representation of a controller archive on disk.
 
         In this context, the path parameter refers to a controller directory
@@ -1094,6 +1229,7 @@ def __init__(self, path: Path, cache: Path, logger: Logger):
             path: Controller ARCHIVE directory path
             cache: The base of the cache tree
             logger: Logger object
+            discover: Discover all tarballs if True
         """
         self.logger = logger
 
@@ -1118,21 +1254,29 @@ def __init__(self, path: Path, cache: Path, logger: Logger):
         # Discover the tarballs that already exist.
         # Depends on instance properties and should remain at the end of the
         # constructor!
-        self._discover_tarballs()
+        if discover:
+            self._discover_tarballs()
 
-    def _add_if_tarball(self, file: Path, md5: Optional[str] = None):
+    def _add_if_tarball(self, file: Path, md5: Optional[str] = None) -> bool:
         """Check for a tar file, and create an object
 
         Args:
             file: path of potential tarball
             md5: known MD5 hash, or None to compute here
+
+        Returns:
+            true if the tarball was added
         """
-        if file.is_file() and Dataset.is_tarball(file):
-            hash = md5 if md5 else get_tarball_md5(file)
-            tarball = Tarball(file, hash, self)
-            self.tarballs[tarball.name] = tarball
-            self.datasets[tarball.resource_id] = tarball
-            tarball.check_unpacked()
+
+        if not (file.is_file() and Dataset.is_tarball(file)):
+            return False
+
+        hash = md5 if md5 else get_tarball_md5(file)
+        tarball = Tarball(file, hash, self)
+        self.tarballs[tarball.name] = tarball
+        self.datasets[tarball.resource_id] = tarball
+        tarball.check_unpacked()
+        return True
 
     def _discover_tarballs(self):
         """Discover the known tarballs
@@ -1288,16 +1432,22 @@ def __init__(self, options: PbenchServerConfig, logger: Logger):
         # resource_id.
         self.datasets: dict[str, Tarball] = {}
 
-    def full_discovery(self) -> "CacheManager":
+    def full_discovery(self, search: bool = True) -> "CacheManager":
         """Discover the ARCHIVE and CACHE trees
 
-        NOTE: both _discover_unpacked() and _discover_results() rely on the
-        results of _discover_archive(), which must run first.
+        Full discovery is only needed for reporting, and is not required
+        to find, create, or delete a specific dataset. (See find_dataset.)
 
-        Full discovery is not required in order to find, create, or delete a
-        specific dataset.
+        Args:
+            search: search the ARCHIVE tree rather than using the Dataset table
+
+        Returns:
+            CacheManager instance for chaining
         """
-        self._discover_controllers()
+        if search:
+            self._discover_controllers()
+        else:
+            self._discover_datasets()
         return self
 
     def __contains__(self, dataset_id: str) -> bool:
@@ -1341,7 +1491,7 @@ def _clean_empties(self, controller: str):
             self.delete_if_empty(archive)
             del self.controllers[controller]
 
-    def _add_controller(self, directory: Path) -> None:
+    def _add_controller(self, directory: Path, discover: bool = True) -> Controller:
         """Create a new Controller object
 
         Add a new controller to the set of known controllers, and append the
@@ -1350,15 +1500,20 @@ def _add_controller(self, directory: Path) -> None:
 
         Args:
             directory: A controller directory within the ARCHIVE tree
+            discover: Discover tarballs if True
+
+        Returns:
+            The new controller
         """
-        controller = Controller(directory, self.options.CACHE, self.logger)
+        controller = Controller(directory, self.options.CACHE, self.logger, discover)
         controller.cache_manager = self
         self.controllers[controller.name] = controller
         self.tarballs.update(controller.tarballs)
         self.datasets.update(controller.datasets)
+        return controller
 
     def _discover_controllers(self):
-        """Build a representation of the ARCHIVE tree
+        """Discover the ARCHIVE tree exhaustively by searching it.
 
         Record all controllers (top level directories), and the tarballs that
         represent datasets within them.
@@ -1367,15 +1522,64 @@ def _discover_controllers(self):
             if file.is_dir() and file.name != CacheManager.TEMPORARY:
                 self._add_controller(file)
 
+    def _discover_datasets(self):
+        """Discover the ARCHIVE tree from the SQL representation of datasets
+
+        Discover all controllers and tarballs with server.tarball-path
+        metadata. This will find only correct and "live" tarballs, and may
+        miss incorrectly labeled tarballs. (Unless we're trying to diagnose
+        ARCHIVE tree errors, this is probably what we want.)
+        """
+        rows = (
+            Database.db_session.query(
+                Dataset.name,
+                Dataset.resource_id,
+                Metadata.value["tarball-path"].as_string(),
+            )
+            .execution_options(stream_results=True)
+            .outerjoin(
+                Metadata,
+                and_(Dataset.id == Metadata.dataset_ref, Metadata.key == "server"),
+            )
+            .yield_per(1000)
+            .all()
+        )
+
+        for name, resource_id, path in rows:
+            if not path:
+                # This runs asychronously with normal operation, and we might
+                # find a dataset before the "server.tarball-path" metadata is
+                # set. Issue a warning in case this becomes a concern, but
+                # otherwise ignore it and skip the dataset.
+                self.logger.warning(
+                    "query unexpectedly returned name {}, resource_id {}, path {}",
+                    name,
+                    resource_id,
+                    path,
+                )
+                continue
+            tarball = Path(path)
+            controller_dir = tarball.parent
+            if controller_dir.name == resource_id:
+                controller_dir = controller_dir.parent
+            controller = self.controllers.get(
+                controller_dir.name, self._add_controller(controller_dir, False)
+            )
+            if controller._add_if_tarball(tarball, resource_id):
+                self.tarballs.update(controller.tarballs)
+                self.datasets.update(controller.datasets)
+            else:
+                # This would be abnormal: log it and continue
+                self.logger.error(
+                    "Unable to add {}: MD5 {}, path {}", name, resource_id, path
+                )
+
     def find_dataset(self, dataset_id: str) -> Tarball:
-        """Search the ARCHIVE tree for a matching dataset tarball.
+        """Return a descriptor of the identified tarball.
 
         This will build the Controller and Tarball object for that dataset if
         they do not already exist.
 
-        FIXME: This builds the entire Controller, which will discover all
-        datasets within the controller. This could be streamlined.
-
         This allows a targeted minimal entry for mutation without discovering
         the entire tree.
 
@@ -1393,16 +1597,35 @@ def find_dataset(self, dataset_id: str) -> Tarball:
         if dataset_id in self.datasets:
             return self.datasets[dataset_id]
 
-        # The dataset isn't already known; so search for it in the ARCHIVE tree
-        # and (if found) discover the controller containing that dataset.
-        for dir_entry in self.archive_root.iterdir():
-            if dir_entry.is_dir() and dir_entry.name != self.TEMPORARY:
-                for file in dir_entry.glob(f"**/*{Dataset.TARBALL_SUFFIX}"):
-                    md5 = get_tarball_md5(file)
-                    if md5 == dataset_id:
-                        self._add_controller(dir_entry)
-                        return self.datasets[dataset_id]
-        raise TarballNotFound(dataset_id)
+        # The dataset isn't already known; so follow the tarball-path to build
+        # just the necessary controller and tarball objects.
+        start = time.time()
+        sql = start
+        try:
+            dataset = Dataset.query(resource_id=dataset_id)
+            sql = time.time()
+            tarball = Path(Metadata.getvalue(dataset, Metadata.TARBALL_PATH))
+        except Exception as e:
+            raise TarballNotFound(dataset_id) from e
+        controller_dir = tarball.parent
+        if controller_dir.name == dataset_id:
+            controller_dir = controller_dir.parent
+        found = time.time()
+        controller = self.controllers.get(
+            controller_dir.name, self._add_controller(controller_dir, False)
+        )
+        controller._add_if_tarball(tarball, dataset_id)
+        self.tarballs.update(controller.tarballs)
+        self.datasets.update(controller.datasets)
+        add = time.time()
+        self.logger.info(
+            "{}: {:.3f} to find tarball, {:.3f} SQL, {:.3f} to discover controller",
+            dataset.name,
+            found - start,
+            sql - start,
+            add - found,
+        )
+        return self.datasets[dataset_id]
 
     # These are wrappers for controller and tarball operations which need to be
     # aware of higher-level constructs in the Pbench Server cache manager such as
@@ -1413,16 +1636,24 @@ def find_dataset(self, dataset_id: str) -> Tarball:
     #   Alternate constructor to create a Tarball object and move an incoming
     #   tarball and md5 into the proper controller directory.
     #
-    # unpack
-    #   Unpack the ARCHIVE tarball file into a new directory under the
-    #   CACHE directory tree.
+    # find_entry
+    #   Locate a path within a tarball.
     #
-    # uncache
-    #   Remove the unpacked directory tree when no longer needed.
+    # get_contents
+    #   Return metadata about a path within a tarball.
+    #
+    # get_inventory
+    #   Return a managed byte stream for a file within a tarball.
+    #
+    # get_inventory_bytes
+    #   Return the contents a file within a tarball as a string.
     #
     # delete
     #   Remove the tarball and MD5 file from ARCHIVE after uncaching the
     #   unpacked directory tree.
+    #
+    # reclaim_cache
+    #   Remove cached tarball trees to free disk space.
 
     def create(self, tarfile_path: Path) -> Tarball:
         """Bring a new tarball under cache manager management.
@@ -1501,6 +1732,20 @@ def find_entry(self, dataset_id: str, path: Path) -> dict[str, Any]:
         tarball = self.find_dataset(dataset_id)
         return tarball.find_entry(path)
 
+    def get_contents(self, dataset_id: str, path: str, origin: str) -> dict[str, Any]:
+        """Get information about dataset files from the cache map
+
+        Args:
+            dataset_id: Dataset resource ID
+            path: path of requested content
+            origin: base URI for links
+
+        Returns:
+            contents metadata
+        """
+        tarball = self.find_dataset(dataset_id)
+        return tarball.get_contents(path, origin)
+
     def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]:
         """Return filestream data for a file within a dataset tarball
 
diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py
index 6f29a8359f..146f29debf 100644
--- a/lib/pbench/test/unit/server/test_cache_manager.py
+++ b/lib/pbench/test/unit/server/test_cache_manager.py
@@ -31,7 +31,8 @@
     TarballUnpackError,
 )
 from pbench.server.database.models.audit import Audit, AuditStatus, AuditType
-from pbench.server.database.models.datasets import Dataset, DatasetBadName
+from pbench.server.database.models.datasets import Dataset, DatasetBadName, Metadata
+from pbench.server.database.models.users import User
 from pbench.test.unit.server.conftest import make_tarball
 
 
@@ -581,6 +582,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path:
     class MockTarball:
         def __init__(self, path: Path, resource_id: str, controller: Controller):
             self.name = Dataset.stem(path)
+            self.logger = controller.logger
             self.resource_id = "ABC"
             self.dataset = None
             self.tarball_path = path
@@ -1552,6 +1554,170 @@ def read(self, size: int = -1) -> bytes:
         assert stream.lock is None
         assert my_calls == exp_calls
 
+    def test_get_contents(self, server_config, make_logger, tarball, db_session):
+        """Test the TOC analysis"""
+
+        source_tarball, source_md5, md5 = tarball
+        cm = CacheManager(server_config, make_logger)
+        cm.create(source_tarball)
+        root = f"https://fake/api/v1/datasets/{md5}"
+
+        # Unpack the tarball and show the canonical `tarball` fixture contents
+        assert cm.get_contents(md5, "", root) == {
+            "name": "",
+            "type": "DIRECTORY",
+            "uri": f"{root}/contents/",
+            "directories": [],
+            "files": [
+                {
+                    "name": "metadata.log",
+                    "size": 28,
+                    "type": "FILE",
+                    "uri": f"{root}/inventory/metadata.log",
+                }
+            ],
+        }
+
+        assert cm.get_contents(md5, "metadata.log", root) == {
+            "name": "metadata.log",
+            "type": "FILE",
+            "uri": f"{root}/inventory/metadata.log",
+            "size": 28,
+        }
+
+        with pytest.raises(CacheExtractError):
+            cm.get_contents(md5, "../..", root)
+
+        # Now that we have an unpacked directory, manually "enhance" it to test
+        # more paths.
+
+        base: Path = cm.find_dataset(md5).unpacked
+        subdir = base / "subdir"
+        subdir.mkdir()
+        ldir = base / "dir_link"
+        ldir.symlink_to(subdir)
+        lfile = base / "file_link"
+        lfile.symlink_to(Path(base / "metadata.log"))
+        badlink = base / "bad_link"
+        badlink.symlink_to("/etc/passwd")
+        illegallink = base / "illegal_link"
+        illegallink.symlink_to(base / "..")
+        linklink = base / "link_link"
+        linklink.symlink_to(lfile)
+        fifo = base / "fifo"
+        os.mkfifo(fifo)
+        fifolink = base / "fifo_link"
+        fifolink.symlink_to(fifo)
+        assert cm.get_contents(md5, "", root) == {
+            "name": "",
+            "type": "DIRECTORY",
+            "directories": [
+                {
+                    "name": "dir_link",
+                    "type": "SYMLINK",
+                    "link": "subdir",
+                    "link_type": "DIRECTORY",
+                    "uri": f"{root}/contents/subdir",
+                },
+                {
+                    "name": "subdir",
+                    "type": "DIRECTORY",
+                    "uri": f"{root}/contents/subdir",
+                },
+            ],
+            "files": [
+                {
+                    "name": "bad_link",
+                    "type": "SYMLINK",
+                    "link": "/etc/passwd",
+                    "link_type": "BROKEN",
+                    "uri": f"{root}/inventory/bad_link",
+                },
+                {
+                    "name": "fifo",
+                    "type": "OTHER",
+                    "uri": f"{root}/inventory/fifo",
+                },
+                {
+                    "name": "fifo_link",
+                    "type": "SYMLINK",
+                    "link": "fifo",
+                    "link_type": "OTHER",
+                    "uri": f"{root}/inventory/fifo_link",
+                },
+                {
+                    "name": "file_link",
+                    "type": "SYMLINK",
+                    "link": "metadata.log",
+                    "link_type": "FILE",
+                    "uri": f"{root}/inventory/metadata.log",
+                },
+                {
+                    "name": "illegal_link",
+                    "type": "SYMLINK",
+                    "link": str(base / ".."),
+                    "link_type": "BROKEN",
+                    "uri": f"{root}/inventory/illegal_link",
+                },
+                {
+                    "name": "link_link",
+                    "type": "SYMLINK",
+                    "link": "metadata.log",
+                    "link_type": "FILE",
+                    "uri": f"{root}/inventory/metadata.log",
+                },
+                {
+                    "name": "metadata.log",
+                    "type": "FILE",
+                    "uri": f"{root}/inventory/metadata.log",
+                    "size": 28,
+                },
+            ],
+            "uri": f"{root}/contents/",
+        }
+
+        assert cm.get_contents(md5, "subdir", root) == {
+            "name": "subdir",
+            "type": "DIRECTORY",
+            "directories": [],
+            "files": [],
+            "uri": f"{root}/contents/subdir",
+        }
+        assert cm.get_contents(md5, "dir_link", root) == {
+            "name": "dir_link",
+            "type": "SYMLINK",
+            "uri": f"{root}/contents/subdir",
+            "link": "subdir",
+            "link_type": "DIRECTORY",
+        }
+        assert cm.get_contents(md5, "file_link", root) == {
+            "name": "file_link",
+            "type": "SYMLINK",
+            "uri": f"{root}/inventory/metadata.log",
+            "link": "metadata.log",
+            "link_type": "FILE",
+            "size": 28,
+        }
+        assert cm.get_contents(md5, "bad_link", root) == {
+            "name": "bad_link",
+            "type": "SYMLINK",
+            "uri": f"{root}/inventory/bad_link",
+            "link": "/etc/passwd",
+            "link_type": "BROKEN",
+        }
+        assert cm.get_contents(md5, "fifo_link", root) == {
+            "name": "fifo_link",
+            "type": "SYMLINK",
+            "uri": f"{root}/inventory/fifo_link",
+            "link": "fifo",
+            "link_type": "OTHER",
+        }
+        assert cm.get_contents(md5, "fifo", root) == {
+            "name": "fifo",
+            "type": "OTHER",
+            "uri": f"{root}/inventory/fifo",
+        }
+
     def test_find(
         self,
         selinux_enabled,
@@ -1566,6 +1732,12 @@ def test_find(
         through the various supported methods.
         """
 
+        def mock_query(**kwargs) -> Dataset:
+            id = kwargs.get("resource_id")
+            if id == md5:
+                return Dataset(name=dataset_name, resource_id=md5)
+            raise TarballNotFound(id)
+
         monkeypatch.setattr(Tarball, "_get_metadata", fake_get_metadata)
         source_tarball, source_md5, md5 = tarball
         dataset_name = Dataset.stem(source_tarball)
@@ -1580,6 +1752,8 @@ def test_find(
         assert tarball
         assert tarball.name == dataset_name
         assert tarball.resource_id == md5
+        monkeypatch.setattr(Dataset, "query", mock_query)
+        monkeypatch.setattr(Metadata, "getvalue", lambda d, k: tarball.tarball_path)
 
         # Test __getitem__
         assert tarball == cm[md5]
@@ -1607,7 +1781,7 @@ def test_find(
         assert str(exc.value) == "The dataset tarball named 'foobar' is not found"
         assert exc.value.tarball == "foobar"
 
-        # Unpack the dataset, creating INCOMING and RESULTS links
+        # Unpack the dataset
         tarball.get_results(FakeLockRef(tarball.lock))
         assert tarball.cache == controller.cache / md5
         assert tarball.unpacked == controller.cache / md5 / tarball.name
@@ -1807,6 +1981,44 @@ def test_compatibility(
         assert not tar2.exists()
         assert not tar1.parent.exists()
 
+    def test_discover_datasets(
+        self,
+        db_session,
+        selinux_enabled,
+        create_drb_user,
+        server_config,
+        make_logger,
+        tarball,
+        monkeypatch,
+    ):
+        """Test compatibility with both new "isolated" and old tarballs
+
+        Make sure we can discover and manage (unpack and delete) both new
+        tarballs with an MD5 isolation directory and pre-existing tarballs
+        directly in the controller directory.
+        """
+
+        monkeypatch.setattr(Tarball, "_get_metadata", fake_get_metadata)
+        source_tarball, source_md5, md5 = tarball
+        name = Dataset.stem(source_tarball)
+        cm = CacheManager(server_config, make_logger)
+        cm.create(source_tarball)
+        assert cm[md5].name == name
+        path = str(cm[md5].tarball_path)
+
+        # Rediscover the cache, which should find the tarball
+        cm1 = CacheManager(server_config, make_logger).full_discovery()
+        assert cm1[md5].name == name
+
+        # Rediscover again, using dataset metadata. But the cache manager
+        # doesn't create the Dataset or Metadata, so we do it here)
+        drb = User.query(username="drb")
+        ds = Dataset(name=name, resource_id=md5, owner=drb, access="public")
+        ds.add()
+        Metadata.setvalue(ds, Metadata.TARBALL_PATH, path)
+        cm2 = CacheManager(server_config, make_logger).full_discovery(search=False)
+        assert cm2[md5].name == name
+
     def test_lockref(self, monkeypatch):
         """Test behavior of the LockRef class"""
 
diff --git a/lib/pbench/test/unit/server/test_datasets_compare.py b/lib/pbench/test/unit/server/test_datasets_compare.py
index ed5611cdbd..467946de9e 100644
--- a/lib/pbench/test/unit/server/test_datasets_compare.py
+++ b/lib/pbench/test/unit/server/test_datasets_compare.py
@@ -10,11 +10,13 @@
 from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
 from pbench.server.database.models.users import User
 
+real_getvalue = Metadata.getvalue
 
-def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> str:
-    if dataset.name == "uperf_3" or dataset.name == "uperf_4":
-        return "hammerDB"
-    return "uperf"
+
+def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> JSON:
+    if key == Metadata.SERVER_BENCHMARK:
+        return "hammerDB" if dataset.name in ("uperf_3", "uperf_4") else "uperf"
+    return real_getvalue(dataset, key, user)
 
 
 class TestCompareDatasets:
@@ -55,8 +57,11 @@ def query_api(
         return query_api
 
     def test_no_postprocessed_data(self, query_get_as, monkeypatch):
-        monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
-
+        """Test with an unknown benchmark"""
+        fio_2 = Dataset.query(name="fio_2")
+        Metadata.setvalue(
+            fio_2, Metadata.SERVER_BENCHMARK, Metadata.SERVER_BENCHMARK_UNKNOWN
+        )
         query_get_as(["fio_2"], "drb", HTTPStatus.BAD_REQUEST)
 
     def test_with_incorrect_data(self, query_get_as, monkeypatch):
diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py
index dc05989ca6..1aaa296f00 100644
--- a/lib/pbench/test/unit/server/test_datasets_contents.py
+++ b/lib/pbench/test/unit/server/test_datasets_contents.py
@@ -5,11 +5,11 @@
 import pytest
 import requests
 
-from pbench.server.cache_manager import BadDirpath, CacheManager, CacheObject, CacheType
+from pbench.server.cache_manager import BadDirpath, CacheManager
 from pbench.server.database.models.datasets import Dataset, DatasetNotFound
 
 
-class TestDatasetsAccess:
+class TestDatasetsContents:
     @pytest.fixture()
     def query_get_as(self, client, server_config, more_datasets, pbench_drb_token):
         """
@@ -63,220 +63,57 @@ def test_unauthorized_access(self, query_get_as):
             "message": "User drb is not authorized to READ a resource owned by test with private access"
         }
 
+    def test_contents_error(self, monkeypatch, query_get_as):
+        """This fails with an internal error from get_contents"""
+
+        def mock_contents(_s, _d, _p, _o):
+            raise Exception("Nobody expected me")
+
+        monkeypatch.setattr(CacheManager, "get_contents", mock_contents)
+        query_get_as("drb", "metadata.log", HTTPStatus.INTERNAL_SERVER_ERROR)
+
+    def test_jsonify_error(self, monkeypatch, query_get_as):
+        """This fails with an internal error from jsonifying bad data"""
+
+        monkeypatch.setattr(
+            CacheManager, "get_contents", lambda _s, _d, _p, _o: {1: 0, Path("."): "a"}
+        )
+        query_get_as("drb", "metadata.log", HTTPStatus.INTERNAL_SERVER_ERROR)
+
     @pytest.mark.parametrize("key", ("", ".", "subdir1"))
     def test_path_is_directory(self, query_get_as, monkeypatch, key):
-        """Mock a directory cache node to check the returned data"""
+        """Mock a directory cache node to check that data is passed through"""
         name = "" if key == "." else key
-
-        def mock_find_entry(_s, _d: str, path: Optional[Path]):
-            file = path if path else Path(".")
-            return {
-                "children": {},
-                "details": CacheObject(
-                    name, file, None, None, None, CacheType.DIRECTORY
-                ),
-            }
-
-        monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry)
-        response = query_get_as("fio_2", key, HTTPStatus.OK)
-        assert response.json == {
+        contents = {
             "directories": [],
             "files": [],
-            "name": name,
-            "type": "DIRECTORY",
             "uri": f"https://localhost/api/v1/datasets/random_md5_string4/contents/{name}",
         }
 
+        monkeypatch.setattr(CacheManager, "get_contents", lambda s, d, p, o: contents)
+        response = query_get_as("fio_2", key, HTTPStatus.OK)
+        assert response.json == contents
+
     def test_not_a_file(self, query_get_as, monkeypatch):
-        """When 'find_entry' fails with an exception, we report the text"""
+        """When 'get_contents' fails with an exception, we report the text"""
 
-        def mock_find_entry(_s, _d: str, path: Optional[Path]):
+        def mock_get_contents(_s, _d: str, _p: Optional[Path], _o: str):
             raise BadDirpath("Nobody home")
 
-        monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry)
+        monkeypatch.setattr(CacheManager, "get_contents", mock_get_contents)
         response = query_get_as("fio_2", "subdir1/f1_sym", HTTPStatus.NOT_FOUND)
         assert response.json == {"message": "Nobody home"}
 
     def test_file_info(self, query_get_as, monkeypatch):
-        """Mock a file cache node to check the returned data"""
+        """Mock a file cache node to check check that data is passed through"""
         name = "f1.json"
-
-        def mock_find_entry(_s, _d: str, path: Optional[Path]):
-            return {
-                "details": CacheObject(path.name, path, None, None, 16, CacheType.FILE)
-            }
-
-        monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry)
-        response = query_get_as("fio_2", name, HTTPStatus.OK)
-        assert response.status_code == HTTPStatus.OK
-        assert response.json == {
+        file_data = {
             "name": name,
             "size": 16,
             "type": "FILE",
             "uri": f"https://localhost/api/v1/datasets/random_md5_string4/inventory/{name}",
         }
-
-    def test_link_info(self, query_get_as, monkeypatch):
-        """Mock a symlink cache node to check the returned data"""
-        name = "test/slink"
-
-        def mock_find_entry(_s, _d: str, path: Optional[Path]):
-            return {
-                "details": CacheObject(
-                    path.name,
-                    path,
-                    Path("test1"),
-                    CacheType.DIRECTORY,
-                    None,
-                    CacheType.SYMLINK,
-                )
-            }
-
-        monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry)
+        monkeypatch.setattr(CacheManager, "get_contents", lambda s, d, p, o: file_data)
         response = query_get_as("fio_2", name, HTTPStatus.OK)
-        assert response.json == {
-            "name": "slink",
-            "type": "SYMLINK",
-            "link": "test1",
-            "link_type": "DIRECTORY",
-            "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/test1",
-        }
-
-    def test_dir_info(self, query_get_as, monkeypatch):
-        """Confirm the returned data for a directory with various children"""
-
-        def mock_find_entry(_s, _d: str, path: Optional[Path]):
-            base = path if path else Path("")
-            return {
-                "children": {
-                    "default": {
-                        "details": CacheObject(
-                            "default",
-                            base / "default",
-                            None,
-                            None,
-                            None,
-                            CacheType.DIRECTORY,
-                        )
-                    },
-                    "file.txt": {
-                        "details": CacheObject(
-                            "file.txt",
-                            base / "file.txt",
-                            None,
-                            None,
-                            42,
-                            CacheType.FILE,
-                        )
-                    },
-                    "badlink": {
-                        "details": CacheObject(
-                            "badlink",
-                            base / "badlink",
-                            Path("../../../.."),
-                            CacheType.BROKEN,
-                            None,
-                            CacheType.SYMLINK,
-                        ),
-                    },
-                    "mountlink": {
-                        "details": CacheObject(
-                            "mountlink",
-                            base / "mountlink",
-                            Path("mount"),
-                            CacheType.OTHER,
-                            None,
-                            CacheType.SYMLINK,
-                        ),
-                    },
-                    "symfile": {
-                        "details": CacheObject(
-                            "symfile",
-                            base / "symfile",
-                            Path("metadata.log"),
-                            CacheType.FILE,
-                            None,
-                            CacheType.SYMLINK,
-                        )
-                    },
-                    "symdir": {
-                        "details": CacheObject(
-                            "symdir",
-                            base / "symdir",
-                            Path("realdir"),
-                            CacheType.DIRECTORY,
-                            None,
-                            CacheType.SYMLINK,
-                        )
-                    },
-                    "mount": {
-                        "details": CacheObject(
-                            "mount",
-                            base / "mount",
-                            None,
-                            None,
-                            20,
-                            CacheType.OTHER,
-                        )
-                    },
-                },
-                "details": CacheObject(
-                    base.name, base, None, None, None, CacheType.DIRECTORY
-                ),
-            }
-
-        monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry)
-        response = query_get_as("fio_2", "sample1", HTTPStatus.OK)
-        assert response.json == {
-            "directories": [
-                {
-                    "name": "default",
-                    "type": "DIRECTORY",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1/default",
-                }
-            ],
-            "files": [
-                {
-                    "name": "badlink",
-                    "type": "SYMLINK",
-                    "link": "../../../..",
-                    "link_type": "BROKEN",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/badlink",
-                },
-                {
-                    "name": "file.txt",
-                    "size": 42,
-                    "type": "FILE",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/file.txt",
-                },
-                {
-                    "name": "mount",
-                    "type": "OTHER",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/mount",
-                },
-                {
-                    "name": "mountlink",
-                    "type": "SYMLINK",
-                    "link": "mount",
-                    "link_type": "OTHER",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/mountlink",
-                },
-                {
-                    "name": "symdir",
-                    "type": "SYMLINK",
-                    "link": "realdir",
-                    "link_type": "DIRECTORY",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/realdir",
-                },
-                {
-                    "name": "symfile",
-                    "type": "SYMLINK",
-                    "link": "metadata.log",
-                    "link_type": "FILE",
-                    "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/metadata.log",
-                },
-            ],
-            "name": "sample1",
-            "type": "DIRECTORY",
-            "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1",
-        }
+        assert response.status_code == HTTPStatus.OK
+        assert response.json == file_data