Skip to content

Commit

Permalink
Improve client benchmark identification (#3496)
Browse files Browse the repository at this point in the history
PBENCH-1186
PBENCH-1187

A bit of a catch-all. First, this simplifies and standardizes the dashboard's
need to identify a benchmark. Eventually we'll be "smart enough" to go beyond
the Pbench benchmark wrapper (as we intend to move away from relying on them)
and `dataset.metalog.pbench.script` will become unreliable. This adds a new
`server.benchmark`, which right now is the same but can be extended beyond
that in the future.

The dashboard would also like to be able to identify which benchmark types the
current server can support, in advance, and we'd like to avoid hardcoding that
knowledge in the dashboard. For this purpose, I've added a "visualization" key
in the endpoints response to indicate the benchmark types supported by the
current server's `datasets_visualize` and `datasets_compare` APIs.

For some reason, minor consolidation of metadata setting code in the intake
API base resulted in weird unit test and functional test failures. The odd
part is that these should have been failing earlier, as they're cases where
a dataset doesn't have `server.index-map` but the API requires it (for example
in `datasets_detail` and `datasets_contents`). I twizzled the code a bit to
fail with a `CONFLICT` error (which had already been used elsewhere) rather
than a server internal error when the dataset is `server.archiveonly`, and
adjusted the tests to match.
  • Loading branch information
dbutenhof committed Jul 17, 2023
1 parent e82943e commit 5329c0f
Show file tree
Hide file tree
Showing 13 changed files with 132 additions and 48 deletions.
4 changes: 3 additions & 1 deletion jenkins/run-server-func-tests
Expand Up @@ -142,7 +142,9 @@ else
fi

if [[ ${exit_status} -ne 0 ]]; then
dump_journal
if (( ${cleanup_flag} )); then
dump_journal
fi
printf -- "\nFunctional tests exited with code %s\n" ${exit_status} >&2
fi

Expand Down
12 changes: 9 additions & 3 deletions lib/pbench/client/__init__.py
Expand Up @@ -494,14 +494,19 @@ def set_metadata(self, dataset_id: str, metadata: JSONOBJECT) -> JSONOBJECT:
).json()

def update(
self, dataset_id: str, access: Optional[str] = None, owner: Optional[str] = None
) -> JSONOBJECT:
self,
dataset_id: str,
access: Optional[str] = None,
owner: Optional[str] = None,
**kwargs,
) -> requests.Response:
"""Update the dataset access or owner
Args:
dataset_id: the resource ID of the targeted dataset
access: set the access mode of the dataset (private, public)
owner: set the owning username of the dataset (requires ADMIN)
kwargs: additional client options
Returns:
A JSON document containing the response
Expand All @@ -515,7 +520,8 @@ def update(
api=API.DATASETS,
uri_params={"dataset": dataset_id},
params=params,
).json()
**kwargs,
)

def get_settings(self, key: str = "") -> JSONOBJECT:
"""Return requested server setting.
Expand Down
4 changes: 2 additions & 2 deletions lib/pbench/server/api/resources/datasets_compare.py
Expand Up @@ -70,7 +70,7 @@ def _get(
datasets = params.query.get("datasets")
benchmark_choice = None
for dataset in datasets:
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
# Validate if all the selected datasets is of same benchmark
if not benchmark_choice:
benchmark_choice = benchmark
Expand Down Expand Up @@ -116,5 +116,5 @@ def _get(
raise APIInternalError(
f"Quisby processing failure. Exception: {quisby_response['exception']}"
)
quisby_response["benchmark"] = benchmark.lower()
quisby_response["benchmark"] = benchmark
return jsonify(quisby_response)
7 changes: 3 additions & 4 deletions lib/pbench/server/api/resources/datasets_visualize.py
Expand Up @@ -60,9 +60,8 @@ def _get(

dataset = params.uri["dataset"]

metadata = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
benchmark = metadata.upper()
benchmark_type = BenchmarkName.__members__.get(benchmark)
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
HTTPStatus.BAD_REQUEST, f"Unsupported Benchmark: {benchmark}"
Expand All @@ -84,5 +83,5 @@ def _get(
raise APIInternalError(
f"Quisby processing failure. Exception: {quisby_response['exception']}"
)
quisby_response["benchmark"] = benchmark.lower()
quisby_response["benchmark"] = benchmark
return jsonify(quisby_response)
4 changes: 4 additions & 0 deletions lib/pbench/server/api/resources/endpoint_configure.py
Expand Up @@ -2,6 +2,7 @@
from urllib.parse import urljoin

from flask import current_app, jsonify, Request, Response
from pquisby.lib.post_processing import BenchmarkName

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
Expand Down Expand Up @@ -122,6 +123,9 @@ def _get(self, args: ApiParams, request: Request, context: ApiContext) -> Respon
endpoints = {
"identification": f"Pbench server {self.server_config.COMMIT_ID}",
"uri": templates,
"visualization": {
"benchmarks": [m.lower() for m in BenchmarkName.__members__.keys()]
},
}

client = self.server_config.get("openid", "client")
Expand Down
39 changes: 25 additions & 14 deletions lib/pbench/server/api/resources/intake_base.py
Expand Up @@ -419,15 +419,22 @@ def _intake(
# Add the processed tarball metadata.log file contents, if any.
#
# If we were unable to find or parse the tarball's metadata.log
# file, construct a minimal metadata context identifying the
# dataset as a "foreign" benchmark script, and disable indexing,
# which requires the metadata.
# file we construct a minimal metadata context identifying the
# dataset as an unknown benchmark script, and we disable indexing,
# which requires result metadata.
try:
metalog = tarball.metadata
if not metalog:
metalog = {"pbench": {"script": "Foreign"}}
benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
metalog = {"pbench": {"name": dataset.name, "script": benchmark}}
metadata[Metadata.SERVER_ARCHIVE] = True
attributes["missing_metadata"] = True
else:
p = metalog.get("pbench")
if p:
benchmark = p.get("script", Metadata.SERVER_BENCHMARK_UNKNOWN)
else:
benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
Metadata.create(dataset=dataset, key=Metadata.METALOG, value=metalog)
except Exception as exc:
raise APIInternalError(
Expand All @@ -447,17 +454,21 @@ def _intake(
try:
retention = datetime.timedelta(days=retention_days)
deletion = dataset.uploaded + retention
Metadata.setvalue(
dataset=dataset,
key=Metadata.TARBALL_PATH,
value=str(tarball.tarball_path),
)
Metadata.setvalue(
dataset=dataset,
key=Metadata.SERVER_DELETION,
value=UtcTimeHelper(deletion).to_iso_string(),

# Make a shallow copy so we can add keys without affecting the
# original (which will be recorded in the audit log)
meta = metadata.copy()
meta.update(
{
Metadata.TARBALL_PATH: str(tarball.tarball_path),
Metadata.SERVER_DELETION: UtcTimeHelper(
deletion
).to_iso_string(),
Metadata.SERVER_BENCHMARK: benchmark,
}
)
f = self._set_dataset_metadata(dataset, metadata)
current_app.logger.info("Metadata for {}: {}", dataset.name, meta)
f = self._set_dataset_metadata(dataset, meta)
if f:
attributes["failures"] = f
except Exception as e:
Expand Down
10 changes: 7 additions & 3 deletions lib/pbench/server/api/resources/query_apis/datasets/__init__.py
@@ -1,9 +1,11 @@
from http import HTTPStatus
from typing import AnyStr, List, NoReturn, Union

from flask import current_app

from pbench.server import JSON, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorizationType,
ApiContext,
APIInternalError,
Expand Down Expand Up @@ -119,11 +121,13 @@ def get_index(self, dataset: Dataset, root_index_name: AnyStr) -> AnyStr:
try:
index_map = Metadata.getvalue(dataset=dataset, key=Metadata.INDEX_MAP)
except MetadataError as exc:
current_app.logger.error("{}", exc)
raise APIInternalError(f"Required metadata {Metadata.INDEX_MAP} missing")
if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE):
raise APIAbort(HTTPStatus.CONFLICT, "Dataset is not indexed")
raise APIInternalError(
f"Required metadata {Metadata.INDEX_MAP} missing"
) from exc

if index_map is None:
current_app.logger.error("Index map metadata has no value")
raise APIInternalError(
f"Required metadata {Metadata.INDEX_MAP} has no value"
)
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/database/models/datasets.py
Expand Up @@ -612,6 +612,13 @@ class Metadata(Database.Base):
METALOG = "metalog"
NATIVE_KEYS = [GLOBAL, METALOG, SERVER, USER]

# BENCHMARK provides a simple identification of the native benchmark where
# one can be identified. At the simplest for Pbench Agent tarballs this is
# just "dataset.metalog.pbench.script" but will be expanded in the future
# for example to identify a "pbench-user-benchmark -- fio" as "fio".
SERVER_BENCHMARK = "server.benchmark"
SERVER_BENCHMARK_UNKNOWN = "unknown"

# DELETION timestamp for dataset based on user settings and system
# settings when the dataset is created.
#
Expand Down
1 change: 1 addition & 0 deletions lib/pbench/test/functional/server/test_connect.py
Expand Up @@ -14,6 +14,7 @@ def test_connect(self, server_client: PbenchServerClient):
assert endpoints
assert "identification" in endpoints
assert "uri" in endpoints
assert endpoints["visualization"]["benchmarks"] == ["uperf"]

# Verify that all expected endpoints are reported
for a in endpoints["uri"].keys():
Expand Down
85 changes: 66 additions & 19 deletions lib/pbench/test/functional/server/test_datasets.py
Expand Up @@ -79,7 +79,13 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
print(f"\t... uploaded {name}: {a}")

datasets = server_client.get_list(
metadata=["dataset.access", "server.tarball-path", "dataset.operations"]
metadata=[
"dataset.access",
"dataset.metalog.pbench.script",
"server.benchmark",
"server.tarball-path",
"dataset.operations",
]
)
found = frozenset({d.name for d in datasets})
expected = frozenset(tarballs.keys())
Expand All @@ -91,6 +97,10 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
t = tarballs[dataset.name]
assert dataset.name in dataset.metadata["server.tarball-path"]
assert dataset.metadata["dataset.operations"]["UPLOAD"]["state"] == "OK"
assert (
dataset.metadata["dataset.metalog.pbench.script"]
== dataset.metadata["server.benchmark"]
)
assert t.access == dataset.metadata["dataset.access"]
except HTTPError as exc:
pytest.fail(
Expand Down Expand Up @@ -169,9 +179,17 @@ def test_archive_only(server_client: PbenchServerClient, login_user):
API.DATASETS_INVENTORY, {"dataset": md5, "target": ""}
)
metadata = server_client.get_metadata(
md5, ["dataset.operations", "server.archiveonly"]
md5,
[
"dataset.metalog.pbench.script",
"dataset.operations",
"server.archiveonly",
"server.benchmark",
],
)
assert metadata["dataset.metalog.pbench.script"] == "fio"
assert metadata["server.archiveonly"] is True
assert metadata["server.benchmark"] == "fio"

# NOTE: we could wait here; however, the UNPACK operation is only
# enabled by upload, and INDEX is only enabled by UNPACK: so if they're
Expand Down Expand Up @@ -209,9 +227,18 @@ def test_no_metadata(server_client: PbenchServerClient, login_user):
API.DATASETS_INVENTORY, {"dataset": md5, "target": ""}
)
metadata = server_client.get_metadata(
md5, ["dataset.operations", "dataset.metalog", "server.archiveonly"]
md5,
[
"dataset.operations",
"dataset.metalog",
"server.archiveonly",
"server.benchmark",
],
)
assert metadata["dataset.metalog"] == {"pbench": {"script": "Foreign"}}
assert metadata["dataset.metalog"] == {
"pbench": {"name": name, "script": "unknown"}
}
assert metadata["server.benchmark"] == "unknown"
assert metadata["server.archiveonly"] is True

# NOTE: we could wait here; however, the UNPACK operation is only
Expand Down Expand Up @@ -361,17 +388,21 @@ def test_details(self, server_client: PbenchServerClient, login_user):
)
for d in datasets:
print(f"\t... checking run index for {d.name}")
indexed = not d.metadata["server.archiveonly"]
response = server_client.get(
API.DATASETS_DETAIL, {"dataset": d.resource_id}
API.DATASETS_DETAIL, {"dataset": d.resource_id}, raise_error=False
)
detail = response.json()
indexed = not d.metadata["server.archiveonly"]
if indexed:
assert (
response.ok
), f"DETAILS for {d.name} failed with {detail['message']}"
assert (
d.metadata["dataset.metalog.pbench"]["script"]
== detail["runMetadata"]["script"]
)
else:
assert response.status_code == HTTPStatus.CONFLICT
print(f"\t\t... {d.name} is archiveonly")


Expand Down Expand Up @@ -554,8 +585,15 @@ def test_contents(self, server_client: PbenchServerClient, login_user):

for dataset in datasets:
response = server_client.get(
API.DATASETS_CONTENTS, {"dataset": dataset.resource_id, "target": ""}
API.DATASETS_CONTENTS,
{"dataset": dataset.resource_id, "target": ""},
raise_error=False,
)
archive = dataset.metadata["server.archiveonly"]
if archive:
assert response.status_code == HTTPStatus.CONFLICT
return

assert (
response.ok
), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}"
Expand All @@ -565,15 +603,14 @@ def test_contents(self, server_client: PbenchServerClient, login_user):
# directory is technically possible, but not legal unless it's a
# trivial "archiveonly" dataset. NOTE: this will also fail if
# either the "directories" or "files" JSON keys are missing.
archive = dataset.metadata["server.archiveonly"]
assert json["directories"] or json["files"] or archive
assert json["directories"] or json["files"]

# Even if they're empty, both values must be lists
assert isinstance(json["directories"], list)
assert isinstance(json["files"], list)

# Unless archiveonly, we need at least a metadata.log
assert archive or "metadata.log" in (f["name"] for f in json["files"])
# We need at least a metadata.log
assert "metadata.log" in (f["name"] for f in json["files"])

for d in json["directories"]:
uri = server_client._uri(
Expand All @@ -598,7 +635,7 @@ def test_visualize(self, server_client: PbenchServerClient, login_user):
"""
datasets = server_client.get_list(
owner="tester",
filter=["dataset.metalog.pbench.script:uperf"],
filter=["server.benchmark:uperf"],
)

for dataset in datasets:
Expand All @@ -624,7 +661,7 @@ def test_compare(self, server_client: PbenchServerClient, login_user):
"""
datasets = server_client.get_list(
owner="tester",
filter=["dataset.metalog.pbench.script:uperf"],
filter=["server.benchmark:uperf"],
)

candidates = [dataset.resource_id for dataset in datasets]
Expand Down Expand Up @@ -686,15 +723,25 @@ class TestUpdate:
@pytest.mark.parametrize("access", ("public", "private"))
def test_publish(self, server_client: PbenchServerClient, login_user, access):
expected = "public" if access == "private" else "private"
datasets = server_client.get_list(access=access, mine="true")
datasets = server_client.get_list(
access=access, mine="true", metadata=["server.archiveonly"]
)
print(f" ... updating {access} datasets to {expected} ...")
for dataset in datasets:
server_client.update(dataset.resource_id, access=expected)
print(f"\t ... updating {dataset.name} to {access!r}")
meta = server_client.get_metadata(
dataset.resource_id, metadata="dataset.access"
response = server_client.update(
dataset.resource_id, access=expected, raise_error=False
)
assert meta["dataset.access"] == expected
print(f"\t ... updating {dataset.name} to {access!r}")
if response.ok:
assert not dataset.metadata["server.archiveonly"]
meta = server_client.get_metadata(
dataset.resource_id, metadata="dataset.access"
)
assert meta["dataset.access"] == expected
else:
assert dataset.metadata[
"server.archiveonly"
], f"Indexed dataset {dataset.name} failed to update with {response.json()['message']}"


class TestDelete:
Expand Down
2 changes: 1 addition & 1 deletion lib/pbench/test/unit/server/test_datasets_visualize.py
Expand Up @@ -130,5 +130,5 @@ def mock_get_metadata(_d: Dataset, _k: str, _u: Optional[User] = None) -> JSON:
monkeypatch.setattr(Metadata, "getvalue", mock_get_metadata)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.BAD_REQUEST)
assert response.json["message"] == "Unsupported Benchmark: HAMMERDB"
assert response.json["message"] == "Unsupported Benchmark: hammerDB"
assert extract_not_called
1 change: 1 addition & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Expand Up @@ -54,6 +54,7 @@ def check_config(self, client, server_config, host, my_headers={}):
uri = urljoin(host, uri_prefix)
expected_results = {
"identification": f"Pbench server {server_config.COMMIT_ID}",
"visualization": {"benchmarks": ["uperf"]},
"uri": {
"datasets": {
"template": f"{uri}/datasets/{{dataset}}",
Expand Down

0 comments on commit 5329c0f

Please sign in to comment.