diff --git a/lib/pbench/cli/server/__init__.py b/lib/pbench/cli/server/__init__.py index 108f8307ac..b66557a1ce 100644 --- a/lib/pbench/cli/server/__init__.py +++ b/lib/pbench/cli/server/__init__.py @@ -1,7 +1,7 @@ import datetime from threading import Thread import time -from typing import Any, Optional +from typing import Any, Optional, Union import click from click import Context, Parameter, ParamType @@ -11,6 +11,27 @@ from pbench.server.database import init_db +class DateParser(ParamType): + """The DateParser type converts date strings into `datetime` objects. + + This is a variant of click's built-in DateTime parser, but uses the + more flexible dateutil.parser + """ + + name = "dateparser" + + def convert( + self, value: Any, param: Optional[Parameter], ctx: Optional[Context] + ) -> Any: + if isinstance(value, datetime.datetime): + return value + + try: + return parser.parse(value) + except Exception as e: + self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}") + + class Detail: """Encapsulate generation of additional diagnostics""" @@ -63,13 +84,16 @@ def warning(self, message: str): class Verify: """Encapsulate -v status messages.""" - def __init__(self, verify: bool): + def __init__(self, verify: Union[bool, int]): """Initialize the object. Args: verify: True to write status messages. """ - self.verify = verify + if isinstance(verify, int): + self.verify = verify + else: + self.verify = 1 if verify else 0 def __bool__(self) -> bool: """Report whether verification is enabled. @@ -77,15 +101,15 @@ def __bool__(self) -> bool: Returns: True if verification is enabled. """ - return self.verify + return bool(self.verify) - def status(self, message: str): + def status(self, message: str, level: int = 1): """Write a message if verification is enabled. Args: message: status string """ - if self.verify: + if self.verify >= level: ts = datetime.datetime.now().astimezone() click.secho(f"({ts:%H:%M:%S}) {message}", fg="green", err=True) @@ -138,27 +162,6 @@ def watcher(self): ) -class DateParser(ParamType): - """The DateParser type converts date strings into `datetime` objects. - - This is a variant of click's built-in DateTime parser, but uses the - more flexible dateutil.parser - """ - - name = "dateparser" - - def convert( - self, value: Any, param: Optional[Parameter], ctx: Optional[Context] - ) -> Any: - if isinstance(value, datetime.datetime): - return value - - try: - return parser.parse(value) - except Exception as e: - self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}") - - def config_setup(context: object) -> PbenchServerConfig: config = PbenchServerConfig.create(context.config) # We're going to need the DB to track dataset state, so setup DB access. diff --git a/lib/pbench/cli/server/audit.py b/lib/pbench/cli/server/audit.py index 980f35b6d5..e385d2f177 100644 --- a/lib/pbench/cli/server/audit.py +++ b/lib/pbench/cli/server/audit.py @@ -5,7 +5,7 @@ import click from pbench.cli import pass_cli_context -from pbench.cli.server import config_setup, Verify +from pbench.cli.server import config_setup, DateParser, Verify from pbench.cli.server.options import common_options from pbench.server import BadConfig, OperationCode from pbench.server.database.database import Database @@ -110,6 +110,7 @@ def auditor(kwargs) -> Iterator[str]: @click.command(name="pbench-audit") @pass_cli_context +@click.option("--id", type=int, help="Select by audit event ID") @click.option( "--ids", default=False, @@ -130,6 +131,7 @@ def auditor(kwargs) -> Iterator[str]: @click.option("--object-id", type=str, help="Select by object ID") @click.option("--object-name", type=str, help="Select by object name") @click.option("--page", default=False, is_flag=True, help="Paginate the output") +@click.option("--root-id", type=int, help="Select by audit event root ID") @click.option("--user-id", type=str, help="Select by user ID") @click.option("--user-name", type=str, help="Select by username") @click.option( @@ -142,12 +144,12 @@ def auditor(kwargs) -> Iterator[str]: ) @click.option( "--since", - type=click.DateTime(), + type=DateParser(), help="Select entries on or after specified date/time", ) @click.option( "--until", - type=click.DateTime(), + type=DateParser(), help="Select entries on or before specified date/time", ) @click.option( diff --git a/lib/pbench/cli/server/repair.py b/lib/pbench/cli/server/repair.py new file mode 100644 index 0000000000..42287b3d03 --- /dev/null +++ b/lib/pbench/cli/server/repair.py @@ -0,0 +1,340 @@ +from collections import defaultdict +import copy +import datetime +from operator import and_ +from pathlib import Path +from typing import Any, Optional + +import click +from sqlalchemy import cast, column, or_, String, Subquery +from sqlalchemy.orm import aliased +from sqlalchemy.sql.expression import func + +from pbench.cli import pass_cli_context +from pbench.cli.server import config_setup, Detail, Verify, Watch +from pbench.cli.server.options import common_options +from pbench.common.logger import get_pbench_logger +from pbench.server import BadConfig, cache_manager +from pbench.server.database.database import Database +from pbench.server.database.models.audit import Audit +from pbench.server.database.models.datasets import Dataset, Metadata +from pbench.server.database.models.server_settings import get_retention_days +from pbench.server.utils import UtcTimeHelper + +detailer: Optional[Detail] = None +watcher: Optional[Watch] = None +verifier: Optional[Verify] = None + + +LIMIT = cache_manager.MAX_ERROR + + +def repair_audit(kwargs): + """Repair certain audit record problems. + + 1. truncate extremely long audit event "attributes" + + Args: + kwargs: the command options. + """ + + rows = ( + Database.db_session.query(Audit) + .filter(func.length(cast(Audit.attributes, String)) >= LIMIT) + .order_by(Audit.timestamp) + .execution_options(stream_results=True) + .yield_per(2000) + ) + count = 0 + attributes_errors = 0 + updated = 0 + commit_error = None + for audit in rows: + count += 1 + name = f"{audit.id}:{audit.status.name} {audit.name} {audit.object_name}" + # Deep copy works around a limitation in SQLAlchemy JSON proxy handling + a: dict[str, Any] = copy.deepcopy(audit.attributes) + if type(a) is not dict: + detailer.error(f"{name} attributes is type {type(a).__name__}") + attributes_errors += 1 + continue + didit = False + for key, value in a.items(): + if type(value) is str and len(value) > LIMIT: + p = f"[TRUNC({len(value)})]" + a[key] = (p + value)[:LIMIT] + detailer.message(f"{name} [{key}] truncated ({len(value)}) to {LIMIT}") + didit = True + if didit: + audit.attributes = a + updated += 1 + if updated: + try: + Database.db_session.commit() + except Exception as e: + commit_error = str(e) + click.echo(f"{count} audit records triggered field truncation") + click.echo( + f"{updated} records were truncated, {count-updated} had no eligible fields" + ) + if attributes_errors: + click.echo(f"{attributes_errors} had format errors in attributes") + if commit_error: + click.echo(f"SQL error, changes were not made: {commit_error!r}") + + +def find_tarball(resource_id: str, kwargs) -> Optional[Path]: + """Find a tarball in the ARCHIVE tree + + This is a "brute force" helper to repair a missing server.tarball-path + metadata. We can't use the cache manager find_dataset(), which relies + on server.tarball-path; so instead we do a search for the MD5 value. + + Args: + resource_id: the dataset resource ID (MD5) + kwargs: CLI options + + Returns: + tarball path if found, or None + """ + + def get_md5(file: Path) -> Optional[str]: + """Locate and read a tarball's associated MD5 hash""" + md5 = file.with_suffix(".xz.md5") + if md5.is_file(): + return md5.read_text().split(" ", maxsplit=1)[0] + else: + detailer.error(f"Missing MD5 {md5}") + return None + + # We use the cache manager as a standard way to get the ARCHIVE root + tree = cache_manager.CacheManager(kwargs["_config"], kwargs["_logger"]) + for controller in tree.archive_root.iterdir(): + watcher.update(f"searching {controller} for {resource_id}") + if controller.is_dir(): + isolator = controller / resource_id + if isolator.is_dir(): + tars = list(isolator.glob("*.tar.xz")) + if len(tars) > 1: + detailer.error( + f"Isolator directory {isolator} contains multiple tarballs: {[str(t) for t in tars]}" + ) + for tar in tars: + if get_md5(tar) == resource_id: + verifier.status(f"Found {tar} for ID {resource_id}", 2) + return tar + detailer.error( + f"Isolator directory {isolator} doesn't contain a tarball for {resource_id}" + ) + return None + else: + for tar in controller.glob("**/*.tar.xz"): + if get_md5(tar) == resource_id: + verifier.status(f"Found {tar} for ID {resource_id}", 2) + return tar + return None + + +def repair_metadata(kwargs): + """Repair certain critical metadata errors + + 1. Missing server.tarball-path + 2. Missing dataset.metalog + 3. Missing server.benchmark + + Args: + kwargs: the command options + """ + + # In order to filter on "derived" values like the JSON path extraction, + # we need to use a nested SELECT -- PostgreSQL doesn't allow using an + # "as" label in the WHERE clause, but labeling the nested SELECT columns + # gets around that limitation. We also do two OUTER JOINs in order to + # filter only for datasets where one of the three things we know how to + # repair is missing. + mlog = aliased(Metadata, name="metalog") + meta: Subquery = ( + Database.db_session.query( + Metadata.dataset_ref, + Metadata.value["tarball-path"].label("path"), + Metadata.value["benchmark"].label("benchmark"), + Metadata.value["deletion"].label("expiration"), + ) + .where(Metadata.key == "server") + .subquery() + ) + query = ( + Database.db_session.query( + Dataset, meta.c.path, meta.c.benchmark, meta.c.expiration, mlog + ) + .outerjoin(meta, Dataset.id == meta.c.dataset_ref) + .outerjoin( + mlog, and_(Dataset.id == mlog.dataset_ref, mlog.key == Metadata.METALOG) + ) + .filter( + or_( + meta.c.path.is_(None), + meta.c.benchmark.is_(None), + meta.c.expiration.is_(None), + column("metalog").is_(None), + ) + ) + .execution_options(stream_results=True) + ) + + path_repairs = 0 + expiration_repairs = 0 + benchmark_repairs = 0 + metalog_repairs = 0 + path_repairs_failed = 0 + expiration_repairs_failed = 0 + metalog_repairs_failed = 0 + rows = query.yield_per(2000) + + # We have to finish reading before we can write metadata, or we'll confuse + # SQLAlchemy's cursor logic. So build a defer queue for metadata values to + # set. (Another alternative would be to list-ify the query: it's unclear + # which would scale better.) + defer = [] + for dataset, path, benchmark, expiration, metadata in rows: + fix = {"dataset": dataset, "metadata": defaultdict()} + if not path: + path_repairs += 1 + path = find_tarball(dataset.resource_id, kwargs) + if path: + detailer.message( + f"{dataset.name} has no {Metadata.TARBALL_PATH}: setting {path}" + ) + fix["metadata"][Metadata.TARBALL_PATH] = str(path) + else: + path_repairs_failed += 1 + detailer.error(f"{dataset.name} doesn't seem to have a tarball") + if not metadata or not metadata.value: + metalog_repairs += 1 + which = "metadata.log" + try: + metalog = cache_manager.Tarball._get_metadata(path) + except Exception: + which = "default" + metalog = { + "pbench": { + "name": dataset.name, + "script": Metadata.SERVER_BENCHMARK_UNKNOWN, + } + } + fix["metadata"][Metadata.SERVER_ARCHIVE] = True + detailer.message( + f"{dataset.name} has no {Metadata.METALOG}: setting from {which}" + ) + fix["metalog"] = metalog + else: + metalog = metadata.value + + if not expiration: + expiration_repairs += 1 + try: + retention_days = get_retention_days(kwargs.get("_config")) + retention = datetime.timedelta(days=retention_days) + deletion = UtcTimeHelper(dataset.uploaded + retention).to_iso_string() + fix["metadata"][Metadata.SERVER_DELETION] = deletion + detailer.message( + f"{dataset.name} {Metadata.SERVER_DELETION} " + f"set ({retention_days} days) to {deletion}" + ) + except Exception as e: + detailer.error( + f"unable to calculate {dataset.name} expiration: {str(e)!r}" + ) + expiration_repairs_failed += 1 + + if not benchmark: + benchmark_repairs += 1 + script = metalog.get("pbench", {}).get( + "script", Metadata.SERVER_BENCHMARK_UNKNOWN + ) + detailer.message( + f"{dataset.name} has no {Metadata.SERVER_BENCHMARK}: setting {script!r}" + ) + fix["metadata"][Metadata.SERVER_BENCHMARK] = script + defer.append(fix) + for each in defer: + dataset = each["dataset"] + metalog = each.get("metalog") + if metalog: + try: + Metadata.create(dataset=dataset, key=Metadata.METALOG, value=metalog) + except Exception as e: + metalog_repairs_failed += 1 + detailer.error(f"Unable to create {dataset.name} metalog: {str(e)!r}") + for key, value in each.get("metadata", {}).items(): + Metadata.setvalue(dataset, key, value) + + click.echo( + f"{path_repairs} {Metadata.TARBALL_PATH} repairs, " + f"{path_repairs_failed} failures" + ) + click.echo( + f"{expiration_repairs} {Metadata.SERVER_DELETION} repairs, " + f"{expiration_repairs_failed} failures" + ) + click.echo( + f"{metalog_repairs} dataset.metalog repairs, " + f"{metalog_repairs_failed} failures" + ) + click.echo(f"{benchmark_repairs} {Metadata.SERVER_BENCHMARK} repairs") + + +@click.command(name="pbench-repair") +@pass_cli_context +@click.option( + "--detail", + "-d", + default=False, + is_flag=True, + help="Provide extra diagnostic information", +) +@click.option( + "--errors", + "-e", + default=False, + is_flag=True, + help="Show individual file errors", +) +@click.option( + "--progress", "-p", type=float, default=0.0, help="Show periodic progress messages" +) +@click.option( + "--verify", "-v", default=False, count=True, help="Display intermediate messages" +) +@common_options +def repair(context: object, **kwargs): + """Repair consistency problems in the Pbench Server data + \f + + Args: + context: click context + kwargs: click options + """ + global detailer, verifier, watcher + detailer = Detail(kwargs.get("detail"), kwargs.get("errors")) + verifier = Verify(kwargs.get("verify")) + watcher = Watch(kwargs.get("progress")) + + try: + config = config_setup(context) + kwargs["_logger"] = get_pbench_logger("pbench-repair", config) + kwargs["_config"] = config + verifier.status("Repairing audit") + watcher.update("repairing audit") + repair_audit(kwargs) + verifier.status("Repairing metadata") + watcher.update("repairing metadata") + repair_metadata(kwargs) + rv = 0 + except Exception as exc: + if verifier.verify: + raise + click.secho(exc, err=True, bg="red") + rv = 2 if isinstance(exc, BadConfig) else 1 + + click.get_current_context().exit(rv) diff --git a/lib/pbench/server/api/resources/__init__.py b/lib/pbench/server/api/resources/__init__.py index ccb59f7307..d74eb095d4 100644 --- a/lib/pbench/server/api/resources/__init__.py +++ b/lib/pbench/server/api/resources/__init__.py @@ -1811,18 +1811,19 @@ def _get_dataset_metadata(dataset: Dataset, requested_items: list[str]) -> JSON: @staticmethod def _set_dataset_metadata( - dataset: Dataset, metadata: dict[str, JSONVALUE] + dataset: Dataset, metadata: dict[str, JSONVALUE], throw: bool = False ) -> dict[str, str]: """Set metadata on a specific Dataset This supports strict Metadata key/value items associated with the Dataset as well as selected columns from the Dataset model. - Errors are collected and returned. + Errors are collected and returned if 'throw' is False (default) Args: dataset: Dataset object metadata: dict of key/value pairs + throw: propagate exceptions instead of returning failures Returns: A dict associating an error string with each failing metadata key. @@ -1837,6 +1838,8 @@ def _set_dataset_metadata( try: Metadata.setvalue(key=k, value=v, dataset=dataset, user=user) except MetadataError as e: + if throw: + raise fail[k] = str(e) return fail diff --git a/lib/pbench/server/api/resources/endpoint_configure.py b/lib/pbench/server/api/resources/endpoint_configure.py index 7c65e9558c..2a53be28c5 100644 --- a/lib/pbench/server/api/resources/endpoint_configure.py +++ b/lib/pbench/server/api/resources/endpoint_configure.py @@ -44,7 +44,9 @@ def __init__(self, config: PbenchServerConfig): proxying was set up for the original endpoints query: e.g., the Javascript `window.origin` from which the Pbench dashboard was loaded. """ - super().__init__(config, ApiSchema(ApiMethod.GET, OperationCode.READ)) + super().__init__( + config, ApiSchema(ApiMethod.GET, OperationCode.READ), always_enabled=True + ) self.server_config = config def _get(self, args: ApiParams, req: Request, context: ApiContext) -> Response: diff --git a/lib/pbench/server/api/resources/intake_base.py b/lib/pbench/server/api/resources/intake_base.py index ed428edea8..75764d9765 100644 --- a/lib/pbench/server/api/resources/intake_base.py +++ b/lib/pbench/server/api/resources/intake_base.py @@ -48,6 +48,7 @@ OperationState, ) from pbench.server.database.models.server_settings import ( + get_retention_days, OPTION_SERVER_INDEXING, ServerSetting, ) @@ -556,17 +557,11 @@ def _intake( f"Unable to create metalog for Tarball {dataset.name!r}: {exc}" ) from exc - try: - retention_days = self.config.default_retention_period - except Exception as e: - raise APIInternalError( - f"Unable to get integer retention days: {e!s}" - ) from e - # Calculate a default deletion time for the dataset, based on the # time it was uploaded rather than the time it was originally # created which might much earlier. try: + retention_days = get_retention_days(self.config) retention = datetime.timedelta(days=retention_days) deletion = dataset.uploaded + retention notes.append(f"Expected expiration date is {deletion:%Y-%m-%d}.") @@ -583,9 +578,7 @@ def _intake( Metadata.SERVER_BENCHMARK: benchmark, } ) - f = self._set_dataset_metadata(dataset, meta) - if f: - attributes["failures"] = f + self._set_dataset_metadata(dataset, meta, throw=True) except Exception as e: raise APIInternalError(f"Unable to set metadata: {e!s}") from e diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 0da346a3f1..39812eeb31 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -26,6 +26,8 @@ RECLAIM_BYTES_PAD = 1024 # Pad unpack reclaim requests by this much MB_BYTES = 1024 * 1024 # Bytes in Mb +MAX_ERROR = 1024 * 5 # Maximum length of subprocess stderr to capture +TRUNC_PREFIX = "[TRUNC]" # Indicate that subprocess stderr was truncated class CacheManagerError(Exception): @@ -95,24 +97,35 @@ def __init__(self, tarball: Path, error: Exception): self.error = str(error) -class TarballUnpackError(CacheManagerError): +class UnpackBaseError(CacheManagerError): + """Base class for unpacking errors""" + + def __init__(self, context: Path, error: str, stderr: Optional[str] = None): + m = f"{error}: {stderr!r}" if stderr else error + super().__init__(m) + self.stderr = stderr + + +class TarballUnpackError(UnpackBaseError): """An error occurred trying to unpack a tarball.""" - def __init__(self, tarball: Path, error: str): - super().__init__(f"An error occurred while unpacking {tarball}: {error}") + def __init__(self, tarball: Path, error: str, stderr: Optional[str] = None): + super().__init__( + tarball, f"An error occurred while unpacking {tarball}: {error}", stderr + ) self.tarball = tarball - self.error = error -class TarballModeChangeError(CacheManagerError): +class TarballModeChangeError(UnpackBaseError): """An error occurred trying to fix unpacked tarball permissions.""" - def __init__(self, tarball: Path, error: str): + def __init__(self, directory: Path, error: str, stderr: Optional[str] = None): super().__init__( - f"An error occurred while changing file permissions of {tarball}: {error}" + directory, + f"An error occurred while changing file permissions of {directory}: {error}", + stderr, ) - self.tarball = tarball - self.error = error + self.directory = directory class CacheType(Enum): @@ -993,7 +1006,7 @@ def _get_metadata(tarball_path: Path) -> JSONOBJECT: def subprocess_run( command: str, working_dir: PathLike, - exception: type[CacheManagerError], + exception: type[UnpackBaseError], ctx: Path, ): """Runs a command as a subprocess. @@ -1024,9 +1037,13 @@ def subprocess_run( raise exception(ctx, str(exc)) from exc else: if process.returncode != 0: + msg = process.stderr + if len(msg) > MAX_ERROR: + msg = (TRUNC_PREFIX + msg)[:MAX_ERROR] raise exception( ctx, - f"{cmd[0]} exited with status {process.returncode}: {process.stderr.strip()!r}", + f"{cmd[0]} exited with status {process.returncode}", + stderr=msg, ) def get_unpacked_size(self) -> int: diff --git a/lib/pbench/server/database/models/server_settings.py b/lib/pbench/server/database/models/server_settings.py index d981ba3ee0..4cfbf95e7d 100644 --- a/lib/pbench/server/database/models/server_settings.py +++ b/lib/pbench/server/database/models/server_settings.py @@ -14,7 +14,7 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.sql.sqltypes import JSON -from pbench.server import JSONOBJECT, JSONVALUE +from pbench.server import JSONOBJECT, JSONVALUE, PbenchServerConfig from pbench.server.database.database import Database from pbench.server.database.models import decode_sql_error @@ -135,6 +135,28 @@ def validate_lifetime(key: str, value: JSONVALUE) -> JSONVALUE: ] +def get_retention_days(config: PbenchServerConfig) -> int: + """Get dataset lifetime + + This recognizes the server settings API as well as the static default if + for some reason we can't read the SQL database or the value we read can't + be converted to an integer. + + Args: + config: the Pbench Server configuration + + Returns: + integer days to expiration + """ + try: + return int(ServerSetting.get(OPTION_DATASET_LIFETIME)) + except Exception: + try: + return config.default_retention_period + except Exception: + return config.MAXIMUM_RETENTION_DAYS + + def validate_server_state(key: str, value: JSONVALUE) -> JSONVALUE: try: status = value[STATE_STATUS_KEY].lower() diff --git a/lib/pbench/server/indexing_tarballs.py b/lib/pbench/server/indexing_tarballs.py index 85d3172b79..58b39e62d8 100644 --- a/lib/pbench/server/indexing_tarballs.py +++ b/lib/pbench/server/indexing_tarballs.py @@ -632,8 +632,10 @@ def sighup_handler(*args): # Re-raise a SIGTERM to avoid it being lumped in with general # exception handling below. raise - except Exception: - idxctx.logger.exception(error_code["GENERIC_ERROR"].message) + except Exception as e: + idxctx.logger.exception( + "{}: {!r}", error_code["GENERIC_ERROR"].message, str(e) + ) res = error_code["GENERIC_ERROR"] else: # No exceptions while processing tar balls, success. diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 146f29debf..b7c8b7c3e1 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -402,15 +402,31 @@ def mock_run(args, **_kwargs): msg = f"An error occurred while unpacking {my_dir}: Command '{my_command}' timed out after 43 seconds" assert str(exc.value) == msg - def test_tarball_subprocess_run_with_returncode(self, monkeypatch): + @pytest.mark.parametrize( + "errmsg,expected", + ( + ("Error unpacking", "Error unpacking"), + ("This message has 25 bytes", "This message has 25 bytes"), + ( + "This is a message we'll find too long", + "[TRUNC]This is a message ", + ), + ( + "One line\nTwo line\nThree line\nFour line\n", + "[TRUNC]One line\nTwo line\n", + ), + ), + ) + def test_tarball_subprocess_failure(self, monkeypatch, errmsg, expected): """Test to check the subprocess_run functionality of the Tarball when returncode value is not zero""" my_command = "mycommand" + monkeypatch.setattr("pbench.server.cache_manager.MAX_ERROR", 25) def mock_run(args, **_kwargs): assert args[0] == my_command return subprocess.CompletedProcess( - args, returncode=1, stdout=None, stderr="Some error unpacking tarball\n" + args, returncode=1, stdout=None, stderr=errmsg ) with monkeypatch.context() as m: @@ -422,7 +438,7 @@ def mock_run(args, **_kwargs): Tarball.subprocess_run(command, my_dir, TarballUnpackError, my_dir) msg = f"An error occurred while unpacking {my_dir.name}: {my_command} " - msg += "exited with status 1: 'Some error unpacking tarball'" + msg += f"exited with status 1: {expected!r}" assert str(exc.value) == msg def test_tarball_subprocess_run_success(self, monkeypatch): diff --git a/lib/pbench/test/unit/server/test_upload.py b/lib/pbench/test/unit/server/test_upload.py index fddee226c4..2465465dd7 100644 --- a/lib/pbench/test/unit/server/test_upload.py +++ b/lib/pbench/test/unit/server/test_upload.py @@ -848,8 +848,9 @@ def test_upload_metadata_error( ): """Test handling of post-intake error setting metadata - Cause Metadata.setvalue to fail. This should be reported in "failures" - without failing the upload. + Cause Metadata.setvalue to fail. This is an abnormal error (generally + a PostgreSQL server problem) and should result in a server internal + error. """ datafile, _, md5 = tarball @@ -867,12 +868,16 @@ def setvalue(dataset: Dataset, key: str, **_kwargs): headers=self.gen_headers(pbench_drb_token, md5), ) - assert response.status_code == HTTPStatus.CREATED + assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR audit = Audit.query() assert len(audit) == 2 - fails = audit[1].attributes["failures"] - assert isinstance(fails, dict) - assert fails["server.benchmark"].startswith("Metadata SQL error 'fake': ") + with pytest.raises(DatasetNotFound): + Dataset.query(resource_id=md5) + assert ( + audit[1] + .attributes["message"] + .startswith("Internal Pbench Server Error: log reference ") + ) @pytest.mark.freeze_time("1970-01-01") def test_upload_archive( diff --git a/server/Makefile b/server/Makefile index b8a0316dc1..89306786a5 100644 --- a/server/Makefile +++ b/server/Makefile @@ -22,6 +22,7 @@ INSTALLOPTS = --directory click-scripts = \ pbench-audit \ pbench-reindex \ + pbench-repair \ pbench-report-generator \ pbench-tree-manage \ pbench-user-create \ diff --git a/setup.cfg b/setup.cfg index 8be6d2b920..7aa620f84d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,7 @@ console_scripts = pbench-list-triggers = pbench.cli.agent.commands.triggers.list:main pbench-register-tool-trigger = pbench.cli.agent.commands.triggers.register:main pbench-reindex = pbench.cli.server.reindex:reindex + pbench-repair = pbench.cli.server.repair:repair pbench-report-generator = pbench.cli.server.report:report pbench-results-move = pbench.cli.agent.commands.results.move:main pbench-results-push = pbench.cli.agent.commands.results.push:main