Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
40cc7e5
Added 3 new properties to the dataset section in provenance_schema.ya…
DerekFurstPitt Sep 14, 2021
10f2521
Added initial implementation of endpoint /datasets/<id>/revisions
DerekFurstPitt Sep 20, 2021
91c3f98
Incremental update. work in progress changes to /datasets/<id>/revisi…
DerekFurstPitt Sep 20, 2021
9523a74
Complete implementation of datasets/<id>/revisions minus authorizatio…
DerekFurstPitt Sep 21, 2021
b707a70
Added authorization code. Now after the initial check that the given …
DerekFurstPitt Sep 21, 2021
fbbc3ba
Reverting a number of small changes that were only necessary for loca…
DerekFurstPitt Sep 22, 2021
372f89e
Merge remote-tracking branch 'origin/test-release' into Derek-Furst/d…
DerekFurstPitt Sep 22, 2021
b76f7fb
Initial implementation of datasets/<id>/retract
DerekFurstPitt Sep 22, 2021
0643f55
Separating revisions and retract methods into 2 separate branches. Th…
DerekFurstPitt Sep 23, 2021
381091f
Added some lines to further normalize the outgoing json. Still a work…
DerekFurstPitt Sep 23, 2021
6786bc1
Modified provenance schema and app.py and schema_triggers to use trig…
DerekFurstPitt Sep 24, 2021
3acc46c
Dataset retraction tweaks
yuanzhou Sep 27, 2021
b2a1ff9
Sync with test-release
yuanzhou Sep 27, 2021
e29fccd
Add schema validators in API call
yuanzhou Sep 27, 2021
42aae30
Add validator to allow retraction on Published dataset only
yuanzhou Sep 27, 2021
f4ad9da
Pass in request instead of request.headers for schema validators
yuanzhou Sep 28, 2021
0ec6050
Remove unnecessary application-level check
yuanzhou Sep 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 90 additions & 7 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,9 @@ def http_internal_server_error(e):
# This neo4j_driver_instance will be used for application-specifc neo4j queries
# as well as being passed to the schema_manager
try:
neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_URI'],
app.config['NEO4J_USERNAME'],
neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_URI'],
app.config['NEO4J_USERNAME'],
app.config['NEO4J_PASSWORD'])

logger.info("Initialized neo4j_driver module successfully :)")
except Exception:
msg = "Failed to initialize the neo4j_driver module"
Expand Down Expand Up @@ -794,7 +793,7 @@ def create_entity(entity_type):
# Execute entity level validator defined in schema yaml before entity creation
# Currently on Dataset and Upload creation require application header
try:
schema_manager.execute_entity_level_validator('before_entity_create_validator', normalized_entity_type, request.headers)
schema_manager.execute_entity_level_validator('before_entity_create_validator', normalized_entity_type, request)
except schema_errors.MissingApplicationHeaderException as e:
bad_request_error(e)
except schema_errors.InvalidApplicationHeaderException as e:
Expand Down Expand Up @@ -997,7 +996,7 @@ def update_entity(id):

# Execute property level validators defined in schema yaml before entity property update
try:
schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request.headers, entity_dict, json_data_dict)
schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request, entity_dict, json_data_dict)
except (schema_errors.MissingApplicationHeaderException,
schema_errors.InvalidApplicationHeaderException,
KeyError,
Expand Down Expand Up @@ -1454,7 +1453,7 @@ def get_previous_revisions(id):
# Get user token from Authorization header
user_token = get_user_token(request)

# Make sure the id exists in uuid-api and
# Make sure the id exists in uuid-api and
# the corresponding entity also exists in neo4j
entity_dict = query_target_entity(id, user_token)
uuid = entity_dict['uuid']
Expand Down Expand Up @@ -1954,14 +1953,98 @@ def get_dataset_revision_number(id):
return jsonify(revision_number)


"""
Retract a published dataset with a retraction reason and sub status

Takes as input a json body with required fields "retracted_reason" and "sub_status".
Authorization handled by gateway. Only token of HuBMAP-Data-Admin group can use this call.

Technically, the same can be achieved by making a PUT call to the generic entity update endpoint
with using a HuBMAP-Data-Admin group token. But doing this is strongly discouraged because we'll
need to add more validators to ensure when "retracted_reason" is provided, there must be a
"sub_status" filed and vise versa. So consider this call a special use case of entity update.

Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset

Returns
-------
dict
The updated dataset details
"""
@app.route('/datasets/<id>/retract', methods=['PUT'])
def retract_dataset(id):
# Always expect a json body
require_json(request)

# Parse incoming json string into json data(python dict object)
json_data_dict = request.get_json()

# Use beblow application-level validations to avoid complicating schema validators
# The 'retraction_reason' and `sub_status` are the only required/allowed fields. No other fields allowed.
# Must enforce this rule otherwise we'll need to run after update triggers if any other fields
# get passed in (which should be done using the generic entity update call)
if 'retraction_reason' not in json_data_dict:
bad_request_error("Missing required field: retraction_reason")

if 'sub_status' not in json_data_dict:
bad_request_error("Missing required field: sub_status")

if len(json_data_dict) > 2:
bad_request_error("Only retraction_reason and sub_status are allowed fields")

# Must be a HuBMAP-Data-Admin group token
token = get_user_token(request)

# Retrieves the neo4j data for a given entity based on the id supplied.
# The normalized entity-type from this entity is checked to be a dataset
# If the entity is not a dataset and the dataset is not published, cannot retract
entity_dict = query_target_entity(id, token)
normalized_entity_type = entity_dict['entity_type']

# A bit more application-level validation
if normalized_entity_type != 'Dataset':
bad_request_error("The entity of given id is not a Dataset")

# Validate request json against the yaml schema
# The given value of `sub_status` is being validated at this step
try:
schema_manager.validate_json_data_against_schema(json_data_dict, normalized_entity_type, existing_entity_dict = entity_dict)
except schema_errors.SchemaValidationException as e:
# No need to log the validation errors
bad_request_error(str(e))

# Execute property level validators defined in schema yaml before entity property update
try:
schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request, entity_dict, json_data_dict)
except (schema_errors.MissingApplicationHeaderException,
schema_errors.InvalidApplicationHeaderException,
KeyError,
ValueError) as e:
bad_request_error(e)

# No need to call after_update() afterwards because retraction doesn't call any after_update_trigger methods
merged_updated_dict = update_entity_details(request, normalized_entity_type, token, json_data_dict, entity_dict)

complete_dict = schema_manager.get_complete_entity_result(token, merged_updated_dict)

# Will also filter the result based on schema
normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict)

# Also reindex the updated entity node in elasticsearch via search-api
reindex_entity(entity_dict['uuid'], token)

return jsonify(normalized_complete_dict)

"""
Retrieve a list of all revisions of a dataset from the id of any dataset in the chain.
E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions
1-5 will be returned in reverse order (newest first). Non-public access is only required to
retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
contains the dataset revision number, its uuid, and then the complete dataset (optional).
"""

@app.route('/datasets/<id>/revisions', methods=['GET'])
def get_revisions_list(id):
# Token is not required, but if an invalid token provided,
Expand Down
17 changes: 16 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,22 @@ ENTITIES:
# The updated_peripherally tag is a temporary measure to correctly handle any attributes
# which are potentially updated by multiple triggers
updated_peripherally: true

retraction_reason:
type: string
before_property_update_validators:
- validate_if_retraction_permitted
- validate_sub_status_provided
description: 'Information recorded about why a the dataset was retracted.'
sub_status:
type: string
before_property_update_validators:
- validate_if_retraction_permitted
- validate_retraction_reason_provided
- validate_retracted_dataset_sub_status_value
description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"'
provider_info:
type: string
description: 'Information recorded about the data provider before an analysis pipeline is run on the data.'

############################################# Donor #############################################
Donor:
Expand Down
16 changes: 8 additions & 8 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,10 +700,10 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
One of the validator types: before_entity_create_validator
normalized_entity_type : str
One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
request_headers: Flask request.headers object, behaves like a dict
The instance of Flask request.headers passed in from application request
request: Flask request object
The instance of Flask request passed in from application request
"""
def execute_entity_level_validator(validator_type, normalized_entity_type, request_headers):
def execute_entity_level_validator(validator_type, normalized_entity_type, request):
global _schema

# A bit validation
Expand All @@ -722,7 +722,7 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque

logger.debug(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")

validator_method_to_call(normalized_entity_type, request_headers)
validator_method_to_call(normalized_entity_type, request)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
Expand All @@ -743,14 +743,14 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
For now only: before_property_update_validators (support multiple validators)
normalized_entity_type : str
One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
request_headers: Flask request.headers object, behaves like a dict
The instance of Flask request.headers passed in from application request
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""
def execute_property_level_validators(validator_type, normalized_entity_type, request_headers, existing_data_dict, new_data_dict):
def execute_property_level_validators(validator_type, normalized_entity_type, request, existing_data_dict, new_data_dict):
global _schema

schema_section = None
Expand All @@ -774,7 +774,7 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re

logger.debug(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type} on property {key}")

validator_method_to_call(key, normalized_entity_type, request_headers, existing_data_dict, new_data_dict)
validator_method_to_call(key, normalized_entity_type, request, existing_data_dict, new_data_dict)
except schema_errors.MissingApplicationHeaderException as e:
raise schema_errors.MissingApplicationHeaderException(e)
except schema_errors.InvalidApplicationHeaderException as e:
Expand Down
1 change: 0 additions & 1 deletion src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,6 @@ def update_dataset_and_ancestors_data_access_level(property_key, normalized_type
except TransactionError:
# No need to log
raise


"""
Trigger event method of getting a list of collections for this new Dataset
Expand Down
Loading