hubmapconsortium · yuanzhou · Sep 28, 2021 · Sep 14, 2021 · Sep 20, 2021 · Sep 20, 2021
diff --git a/src/app.py b/src/app.py
@@ -102,10 +102,9 @@ def http_internal_server_error(e):
 # This neo4j_driver_instance will be used for application-specifc neo4j queries
 # as well as being passed to the schema_manager
 try:
-    neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_URI'], 
-                                                  app.config['NEO4J_USERNAME'], 
+    neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_URI'],
+                                                  app.config['NEO4J_USERNAME'],
                                                   app.config['NEO4J_PASSWORD'])
-
     logger.info("Initialized neo4j_driver module successfully :)")
 except Exception:
     msg = "Failed to initialize the neo4j_driver module"
@@ -794,7 +793,7 @@ def create_entity(entity_type):
     # Execute entity level validator defined in schema yaml before entity creation
     # Currently on Dataset and Upload creation require application header
     try:
-        schema_manager.execute_entity_level_validator('before_entity_create_validator', normalized_entity_type, request.headers)
+        schema_manager.execute_entity_level_validator('before_entity_create_validator', normalized_entity_type, request)
     except schema_errors.MissingApplicationHeaderException as e: 
         bad_request_error(e)  
     except schema_errors.InvalidApplicationHeaderException as e: 
@@ -997,7 +996,7 @@ def update_entity(id):
 
     # Execute property level validators defined in schema yaml before entity property update
     try:
-        schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request.headers, entity_dict, json_data_dict)
+        schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request, entity_dict, json_data_dict)
     except (schema_errors.MissingApplicationHeaderException, 
             schema_errors.InvalidApplicationHeaderException, 
             KeyError, 
@@ -1454,7 +1453,7 @@ def get_previous_revisions(id):
     # Get user token from Authorization header
     user_token = get_user_token(request)
 
-    # Make sure the id exists in uuid-api and 
+    # Make sure the id exists in uuid-api and
     # the corresponding entity also exists in neo4j
     entity_dict = query_target_entity(id, user_token)
     uuid = entity_dict['uuid']
@@ -1954,14 +1953,98 @@ def get_dataset_revision_number(id):
     return jsonify(revision_number)
 
 
+"""
+Retract a published dataset with a retraction reason and sub status
+
+Takes as input a json body with required fields "retracted_reason" and "sub_status".
+Authorization handled by gateway. Only token of HuBMAP-Data-Admin group can use this call. 
+
+Technically, the same can be achieved by making a PUT call to the generic entity update endpoint
+with using a HuBMAP-Data-Admin group token. But doing this is strongly discouraged because we'll
+need to add more validators to ensure when "retracted_reason" is provided, there must be a 
+"sub_status" filed and vise versa. So consider this call a special use case of entity update.
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset 
+
+Returns
+-------
+dict
+    The updated dataset details
+"""
+@app.route('/datasets/<id>/retract', methods=['PUT'])
+def retract_dataset(id):
+    # Always expect a json body
+    require_json(request)
+
+    # Parse incoming json string into json data(python dict object)
+    json_data_dict = request.get_json()
+
+    # Use beblow application-level validations to avoid complicating schema validators
+    # The 'retraction_reason' and `sub_status` are the only required/allowed fields. No other fields allowed.
+    # Must enforce this rule otherwise we'll need to run after update triggers if any other fields 
+    # get passed in (which should be done using the generic entity update call)
+    if 'retraction_reason' not in json_data_dict:
+        bad_request_error("Missing required field: retraction_reason")
+
+    if 'sub_status' not in json_data_dict:
+        bad_request_error("Missing required field: sub_status")
+
+    if len(json_data_dict) > 2:
+        bad_request_error("Only retraction_reason and sub_status are allowed fields")
+
+    # Must be a HuBMAP-Data-Admin group token
+    token = get_user_token(request)
+
+    # Retrieves the neo4j data for a given entity based on the id supplied.
+    # The normalized entity-type from this entity is checked to be a dataset
+    # If the entity is not a dataset and the dataset is not published, cannot retract
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+
+    # A bit more application-level validation
+    if normalized_entity_type != 'Dataset':
+        bad_request_error("The entity of given id is not a Dataset")
+
+    # Validate request json against the yaml schema
+    # The given value of `sub_status` is being validated at this step
+    try:
+        schema_manager.validate_json_data_against_schema(json_data_dict, normalized_entity_type, existing_entity_dict = entity_dict)
+    except schema_errors.SchemaValidationException as e:
+        # No need to log the validation errors
+        bad_request_error(str(e))
+
+    # Execute property level validators defined in schema yaml before entity property update
+    try:
+        schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request, entity_dict, json_data_dict)
+    except (schema_errors.MissingApplicationHeaderException, 
+            schema_errors.InvalidApplicationHeaderException, 
+            KeyError, 
+            ValueError) as e: 
+        bad_request_error(e)
+
+    # No need to call after_update() afterwards because retraction doesn't call any after_update_trigger methods
+    merged_updated_dict = update_entity_details(request, normalized_entity_type, token, json_data_dict, entity_dict)
+
+    complete_dict = schema_manager.get_complete_entity_result(token, merged_updated_dict)
+
+    # Will also filter the result based on schema
+    normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict)
+
+    # Also reindex the updated entity node in elasticsearch via search-api
+    reindex_entity(entity_dict['uuid'], token)
+
+    return jsonify(normalized_complete_dict)
+
 """
 Retrieve a list of all revisions of a dataset from the id of any dataset in the chain. 
 E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions
 1-5 will be returned in reverse order (newest first). Non-public access is only required to 
 retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
 contains the dataset revision number, its uuid, and then the complete dataset (optional).   
 """
-
 @app.route('/datasets/<id>/revisions', methods=['GET'])
 def get_revisions_list(id):
     # Token is not required, but if an invalid token provided,

diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -372,7 +372,22 @@ ENTITIES:
         # The updated_peripherally tag is a temporary measure to correctly handle any attributes
         # which are potentially updated by multiple triggers
         updated_peripherally: true
-
+      retraction_reason:
+        type: string
+        before_property_update_validators:
+          - validate_if_retraction_permitted
+          - validate_sub_status_provided
+        description: 'Information recorded about why a the dataset was retracted.'
+      sub_status:
+        type: string
+        before_property_update_validators:
+          - validate_if_retraction_permitted
+          - validate_retraction_reason_provided
+          - validate_retracted_dataset_sub_status_value
+        description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"'
+      provider_info:
+        type: string
+        description: 'Information recorded about the data provider before an analysis pipeline is run on the data.'
 
   ############################################# Donor #############################################
   Donor:

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
@@ -700,10 +700,10 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
     One of the validator types: before_entity_create_validator
 normalized_entity_type : str
     One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
-request_headers: Flask request.headers object, behaves like a dict
-    The instance of Flask request.headers passed in from application request
+request: Flask request object
+    The instance of Flask request passed in from application request
 """
-def execute_entity_level_validator(validator_type, normalized_entity_type, request_headers):
+def execute_entity_level_validator(validator_type, normalized_entity_type, request):
     global _schema
 
     # A bit validation
@@ -722,7 +722,7 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
 
                 logger.debug(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}")
 
-                validator_method_to_call(normalized_entity_type, request_headers)
+                validator_method_to_call(normalized_entity_type, request)
             except schema_errors.MissingApplicationHeaderException as e: 
                 raise schema_errors.MissingApplicationHeaderException(e) 
             except schema_errors.InvalidApplicationHeaderException as e: 
@@ -743,14 +743,14 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
     For now only: before_property_update_validators (support multiple validators)
 normalized_entity_type : str
     One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
-request_headers: Flask request.headers object, behaves like a dict
-    The instance of Flask request.headers passed in from application request
+request: Flask request object
+    The instance of Flask request passed in from application request
 existing_data_dict : dict
     A dictionary that contains all existing entity properties
 new_data_dict : dict
     The json data in request body, already after the regular validations
 """
-def execute_property_level_validators(validator_type, normalized_entity_type, request_headers, existing_data_dict, new_data_dict):
+def execute_property_level_validators(validator_type, normalized_entity_type, request, existing_data_dict, new_data_dict):
     global _schema
 
     schema_section = None
@@ -774,7 +774,7 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re
 
                     logger.debug(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type} on property {key}")
 
-                    validator_method_to_call(key, normalized_entity_type, request_headers, existing_data_dict, new_data_dict)
+                    validator_method_to_call(key, normalized_entity_type, request, existing_data_dict, new_data_dict)
                 except schema_errors.MissingApplicationHeaderException as e: 
                     raise schema_errors.MissingApplicationHeaderException(e) 
                 except schema_errors.InvalidApplicationHeaderException as e: 

diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
@@ -662,7 +662,6 @@ def update_dataset_and_ancestors_data_access_level(property_key, normalized_type
         except TransactionError:
             # No need to log
             raise
-
 
 """
 Trigger event method of getting a list of collections for this new Dataset
-Original file line number
+Diff line change
@@ Expand Up @@
             except TransactionError:
                 # No need to log
                 raise
     """
     Trigger event method of getting a list of collections for this new Dataset
@@ Expand Down @@