Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lib/pbench/server/api/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
from json.decoder import JSONDecodeError
from logging import Logger
from typing import Any, Callable, List, Union
from typing import Any, Callable, List, Optional, Union

from dateutil import parser as date_parser
from flask import request
Expand Down Expand Up @@ -1021,8 +1021,12 @@ def _get_dataset_metadata(
elif i == Dataset.UPLOADED:
metadata[i] = UtcTimeHelper(dataset.uploaded).to_iso_string()
elif Metadata.is_key_path(i, Metadata.USER_METADATA):
native_key = Metadata.get_native_key(i)
user: Optional[User] = None
if native_key == Metadata.USER_NATIVE_KEY:
user = Auth.token_auth.current_user()
try:
metadata[i] = Metadata.getvalue(dataset=dataset, key=i)
metadata[i] = Metadata.getvalue(dataset=dataset, key=i, user=user)
except MetadataNotFound:
metadata[i] = None
else:
Expand Down
44 changes: 36 additions & 8 deletions lib/pbench/server/api/resources/datasets_metadata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from http import HTTPStatus
from logging import Logger
from typing import Optional

from flask.json import jsonify
from flask.wrappers import Request, Response

from pbench.server import JSON, JSONOBJECT, PbenchServerConfig
from pbench.server.api.auth import Auth
from pbench.server.api.resources import (
APIAbort,
API_OPERATION,
Expand All @@ -19,6 +21,7 @@
Metadata,
MetadataError,
)
from pbench.server.database.models.users import User


class DatasetsMetadata(ApiBase):
Expand Down Expand Up @@ -94,10 +97,7 @@ def _put(self, json_data: JSONOBJECT, _) -> Response:
"name": "datasetname",
"metadata": [
"dashboard.seen": True,
"user": {
"cloud": "AWS",
"contact": "john.carter@mars.org"
}
"user": {"favorite": True}
]
}

Expand All @@ -111,7 +111,7 @@ def _put(self, json_data: JSONOBJECT, _) -> Response:

[
"dashboard.seen": True,
"user": {"cloud": "AWS", "contact": "json.carter@mars.org}
"user": {"favorite": False}
]
"""
name = json_data["dataset"]
Expand All @@ -125,16 +125,44 @@ def _put(self, json_data: JSONOBJECT, _) -> Response:

# Validate the authenticated user's authorization for the combination
# of "owner" and "access".
self._check_authorization(str(dataset.owner_id), dataset.access)
#
# The "unusual" part here is that we make a special case for
# authenticated that are not the owner of the data: we want to allow
# them UPDATE access to PUBLIC datasets (to which they naturally have
# READ access) as long as they're only trying to modify a "user."
# metadata key:
#
# * We want to validate authorized non-owners for READ access if
# they're only trying to modify "user." keys;
# * We want to validate unauthorized users for UPDATE because they have
# READ access to "public" datasets but still can't write even "user."
# metadata;
# * We want to validate authorized users for UPDATE if they're trying
# to set anything other than a "user." key because only the owner can
# do that...
role = API_OPERATION.READ
if not Auth.token_auth.current_user():
role = API_OPERATION.UPDATE
else:
for k in metadata.keys():
if Metadata.get_native_key(k) != Metadata.USER_NATIVE_KEY:
role = API_OPERATION.UPDATE
self._check_authorization(
str(dataset.owner_id), dataset.access, check_role=role
)

failures = []
for k, v in metadata.items():
native_key = Metadata.get_native_key(k)
user: Optional[User] = None
if native_key == Metadata.USER_NATIVE_KEY:
user = Auth.token_auth.current_user()
try:
Metadata.setvalue(dataset, k, v)
Metadata.setvalue(key=k, value=v, dataset=dataset, user=user)
except MetadataError as e:
self.logger.warning("Unable to update key {} = {!r}: {}", k, v, str(e))
failures.append(k)
if failures:
raise APIAbort(HTTPStatus.INTERNAL_SERVER_ERROR)
results = self._get_metadata(name, list(metadata.keys()))
results = self._get_dataset_metadata(dataset, list(metadata.keys()))
return jsonify(results)
151 changes: 85 additions & 66 deletions lib/pbench/server/database/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import os
from pathlib import Path
import re
from typing import Any, List, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

from sqlalchemy import Column, DateTime, Enum, event, ForeignKey, Integer, JSON, String
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy.orm import relationship, validates
from sqlalchemy.orm import relationship, validates, Query
from sqlalchemy.types import TypeDecorator

from pbench.server.database.database import Database
Expand Down Expand Up @@ -821,57 +821,44 @@ class Metadata(Database.Base):
# the "server" namespace, and are strictly controlled by keyword path:
# e.g., "server.deleted", "server.archived";
#
# Metadata keys intended for use by the dashboard client are in the
# "dashboard" namespace. While these can be modified by any API client, the
# separate namespace provides some protection against accidental
# modifications that might break dashboard semantics. This is an "open"
# namespace, allowing the dashboard to define and manage any keys it needs
# within the "dashboard.*" hierarchy.
#
# Metadata keys within the "user" namespace are reserved for general client
# use, although by convention a second-level key-space should be used to
# provide some isolation. This is an "open" namespace, allowing any API
# client to define new keywords such as "user.contact" or "user.me.you"
# and JSON subdocuments are available at any level. For example, if we've
# set "user.contact.email" and "user.postman.test" then retrieving "user"
# will return a JSON document like
# {"contact": {"email": "value"}, "postman": {"test": "value"}}
# and retrieving "user.contact" would return
# {"email": "value"}
# The "dashboard" and "user" namespaces can be written by an authenticated
# client to track external metadata. The difference is that "dashboard" key
# values are visible to all clients with READ access to the dataset, while
# the "user" namespace is visible only to clients authenticated to the user
# that wrote the data. That is, "dashboard.seen" is a global dashboard
# metadata property visible to all users, while "user.favorite" is visible
# only to the specific user that wrote the value; each authenticated user
# may have its own unique "user.favorite" value.
#
# The following class constants define the set of currently available
# metadata keys, where the "open" namespaces are represented by the
# syntax "user.*".

# DELETION timestamp for dataset based on user settings and system
# settings at time the dataset is created.
#
# {"server.deletion": "2021-12-25"}
DELETION = "server.deletion"

# DASHBOARD is arbitrary data saved on behalf of the dashboard client; the
# reserved namespace differs from "user" only in that reserving a primary
# key for the "well known" dashboard client offers some protection against
# key name collisions.
# syntax "dashboard.*" and "user.*" which allow clients to control the
# key names using a "dotted path" notation like "dashboard.seen" or
# "dashboard.contact.email".

# DASHBOARD is arbitrary data saved on behalf of the dashboard client as a
# JSON document. Writing these keys requires ownership of the referenced
# dataset, and the data is visible to all clients with READ access to the
# dataset.
#
# {"dashboard.seen": True}
DASHBOARD = "dashboard.*"

# USER is arbitrary data saved on behalf of the owning user, as a JSON
# document.
#
# Note that the hierarchical key management in the getvalue and setvalue
# static methods (used consistently by the API layer) allow client code
# to interact with these either as a complete JSON document ("user") or
# as a full dotted key path ("user.contact.name.first") or at any JSON
# layer between.
# USER is arbitrary data saved with the dataset on behalf of an
# authenticated user, as a JSON document. Writing these keys requires READ
# access to the referenced dataset, and are visible only to clients that
# are authenticated as the user which set them. Each user can have its own
# unique value for these keys, for example "user.favorite".
#
# API keyword validation uses the trailing ".*" here to indicate that only
# the first element of the path should be validated, allowing the client
# a completely uninterpreted namespace below that.
# {"user.favorite": True}
USER_NATIVE_KEY = "user"
USER = USER_NATIVE_KEY + ".*"

# DELETION timestamp for dataset based on user settings and system
# settings when the dataset is created.
#
# {"user.cloud": "AWS", "user.mood": "CLOUDY"}}
USER = "user.*"
# {"server.deletion": "2021-12-25"}
DELETION = "server.deletion"

# REINDEX boolean flag to indicate when a dataset should be re-indexed
#
Expand Down Expand Up @@ -928,8 +915,10 @@ class Metadata(Database.Base):
key = Column(String(255), unique=False, nullable=False, index=True)
value = Column(JSON, unique=False, nullable=True)
dataset_ref = Column(Integer, ForeignKey("datasets.id"), nullable=False)
user_ref = Column(Integer, ForeignKey("users.id"), nullable=True)

dataset = relationship("Dataset", back_populates="metadatas", single_parent=True)
user = relationship("User", back_populates="dataset_metadata", single_parent=True)

@validates("key")
def validate_key(self, _, value: Any) -> str:
Expand Down Expand Up @@ -980,6 +969,19 @@ def create(**kwargs) -> "Metadata":
else:
return meta

@staticmethod
def get_native_key(key: str) -> str:
"""
Extract the root key name

Args:
key: Key path (e.g., "user.tag")

Returns:
native SQL key name ("user")
"""
return key.lower().split(".")[0]

@staticmethod
def is_key_path(key: str, valid: List[str]) -> bool:
"""
Expand All @@ -988,7 +990,7 @@ def is_key_path(key: str, valid: List[str]) -> bool:
valid. If the key is a dotted path and the first element plus a
trailing ".*" is in the list, then this is an open key namespace where
any subsequent path is acceptable: e.g., "user.*" allows "user", or
"user.contact", "user.contact.name", etc.
"user.favorite", "user.notes.status", etc.

Args:
key: metadata key path
Expand All @@ -1012,7 +1014,7 @@ def is_key_path(key: str, valid: List[str]) -> bool:
return bool(re.fullmatch(Metadata._valid_key_charset, k))

@staticmethod
def getvalue(dataset: Dataset, key: str) -> JSON:
def getvalue(dataset: Dataset, key: str, user: Optional[User] = None) -> JSON:
"""
Returns the value of the specified key, which may be a dotted
hierarchical path (e.g., "server.deleted").
Expand All @@ -1021,15 +1023,28 @@ def getvalue(dataset: Dataset, key: str) -> JSON:
level Metadata object. The full JSON value of a top level key can be
acquired directly using `Metadata.get(dataset, key)`

E.g., for "user.contact.name" with the dataset's Metadata value for the
"user" key as {"contact": {"name": "Dave", "email": "d@example.com"}},
this would return "Dave", whereas Metadata.get(dataset, "user") would
return the entire user key JSON, such as
{"user" {"contact": {"name": "Dave", "email": "d@example.com}}}
For example, if the metadata database has

"dashboard": {
"contact": {
"name": "dave",
"email": "d@example.com"
}
}

then Metadata.get(dataset, "dashboard.contact.name") would return

"Dave"

whereas Metadata.get(dataset, "dashboard") would return the entire user
key JSON, such as

{"dashboard" {"contact": {"name": "Dave", "email": "d@example.com}}}

Args:
dataset: associated dataset
key: hierarchical key path to fetch
user: User-specific key value (used only for "user." namespace)

Returns:
Value of the key path
Expand All @@ -1039,7 +1054,7 @@ def getvalue(dataset: Dataset, key: str) -> JSON:
keys = key.lower().split(".")
native_key = keys.pop(0)
try:
meta = Metadata.get(dataset, native_key)
meta = Metadata.get(dataset, native_key, user)
except MetadataNotFound:
return None
value = meta.value
Expand All @@ -1057,7 +1072,9 @@ def getvalue(dataset: Dataset, key: str) -> JSON:
return value

@staticmethod
def setvalue(dataset: Dataset, key: str, value: Any) -> "Metadata":
def setvalue(
dataset: Dataset, key: str, value: Any, user: Optional[User] = None
) -> "Metadata":
"""
Create or modify an existing metadata value. This method supports
hierarchical dotted paths like "dashboard.seen" and should be used in
Expand All @@ -1083,7 +1100,7 @@ def setvalue(dataset: Dataset, key: str, value: Any) -> "Metadata":
native_key = keys.pop(0)
found = True
try:
meta = Metadata.get(dataset, native_key)
meta = Metadata.get(dataset, native_key, user)

# SQLAlchemy determines whether to perform an `update` based on the
# Python object reference. We make a copy here to ensure that it
Expand Down Expand Up @@ -1120,11 +1137,19 @@ def setvalue(dataset: Dataset, key: str, value: Any) -> "Metadata":
meta.value = meta_value
meta.update()
else:
meta = Metadata.create(dataset=dataset, key=native_key, value=meta_value)
meta = Metadata.create(
dataset=dataset, key=native_key, value=meta_value, user=user
)
return meta

@staticmethod
def get(dataset: Dataset, key: str) -> "Metadata":
def _query(dataset: Dataset, key: str, user: Optional[User]) -> Query:
return Database.db_session.query(Metadata).filter_by(
dataset=dataset, key=key, user=user
)

@staticmethod
def get(dataset: Dataset, key: str, user: Optional[User] = None) -> "Metadata":
"""
Fetch a Metadata (row) from the database by key name.

Expand All @@ -1140,11 +1165,7 @@ def get(dataset: Dataset, key: str) -> "Metadata":
The Metadata model object
"""
try:
meta = (
Database.db_session.query(Metadata)
.filter_by(dataset=dataset, key=key)
.first()
)
meta = __class__._query(dataset, key, user).first()
except SQLAlchemyError as e:
Metadata.logger.exception("Can't get {}>>{} from DB", dataset, key)
raise MetadataSqlError("getting", dataset, key) from e
Expand All @@ -1154,7 +1175,7 @@ def get(dataset: Dataset, key: str) -> "Metadata":
return meta

@staticmethod
def remove(dataset: Dataset, key: str):
def remove(dataset: Dataset, key: str, user: Optional[User] = None):
"""
remove Remove a metadata key from the dataset

Expand All @@ -1166,9 +1187,7 @@ def remove(dataset: Dataset, key: str):
DatasetSqlError: Something went wrong
"""
try:
Database.db_session.query(Metadata).filter_by(
dataset=dataset, key=key
).delete()
__class__._query(dataset, key, user).delete()
Database.db_session.commit()
except SQLAlchemyError as e:
Metadata.logger.exception("Can't remove {}>>{} from DB", dataset, key)
Expand All @@ -1185,7 +1204,7 @@ def add(self, dataset: Dataset):
raise DatasetBadParameterType(dataset, Dataset)

try:
Metadata.get(dataset, self.key)
Metadata.get(dataset, self.key, self.user)
except MetadataNotFound:
pass
else:
Expand Down
Loading